Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit bfef34d

Browse files
xinjiezJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Changing cloud speech code samples to work with v1beta1 (GoogleCloudPlatform#399)
1 parent 15b7063 commit bfef34d

File tree

6 files changed

+50
-51
lines changed

6 files changed

+50
-51
lines changed

speech/api/grpc_auth.py

Whitespace-only changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
gcloud==0.17.0
22
grpcio==0.14.0
33
PyAudio==0.2.9
4-
grpc-google-cloud-speech==1.0.4
4+
grpc-google-cloud-speech-v1beta1==1.0.0

speech/api/speech_gcs.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import argparse
1919

2020
from gcloud.credentials import get_credentials
21-
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
21+
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
2222
from grpc.beta import implementations
2323

2424
# Keep the request alive for this many seconds
@@ -48,25 +48,23 @@ def make_channel(host, port):
4848
return implementations.secure_channel(host, port, composite_channel)
4949

5050

51-
def main(input_uri, output_uri, encoding, sample_rate):
51+
def main(input_uri, encoding, sample_rate):
5252
service = cloud_speech.beta_create_Speech_stub(
5353
make_channel('speech.googleapis.com', 443))
5454
# The method and parameters can be inferred from the proto from which the
5555
# grpc client lib was generated. See:
56-
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
57-
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
58-
initial_request=cloud_speech.InitialRecognizeRequest(
56+
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
57+
response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
58+
config=cloud_speech.RecognitionConfig(
5959
encoding=encoding,
6060
sample_rate=sample_rate,
61-
output_uri=output_uri,
6261
),
63-
audio_request=cloud_speech.AudioRequest(
62+
audio=cloud_speech.RecognitionAudio(
6463
uri=input_uri,
6564
)
6665
), DEADLINE_SECS)
67-
# This shouldn't actually print anything, since the transcription is output
68-
# to the GCS uri specified
69-
print(response.responses)
66+
# Print the recognition results.
67+
print(response.results)
7068

7169

7270
def _gcs_uri(text):
@@ -77,16 +75,15 @@ def _gcs_uri(text):
7775

7876

7977
PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
80-
'google/cloud/speech/v1/cloud_speech.proto')
78+
'google/cloud/speech/v1beta1/cloud_speech.proto')
8179
if __name__ == '__main__':
8280
parser = argparse.ArgumentParser()
8381
parser.add_argument('input_uri', type=_gcs_uri)
84-
parser.add_argument('output_uri', type=_gcs_uri)
8582
parser.add_argument(
8683
'--encoding', default='FLAC', choices=[
8784
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
8885
help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
8986
parser.add_argument('--sample_rate', default=16000)
9087

9188
args = parser.parse_args()
92-
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
89+
main(args.input_uri, args.encoding, args.sample_rate)

speech/api/speech_gcs_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14+
import re
1415
import sys
1516

1617
import pytest
@@ -24,12 +25,11 @@
2425
'https://github.com/grpc/grpc/issues/282'))
2526
def test_main(cloud_config, capsys):
2627
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
27-
output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)
2828

29-
main(input_uri, output_uri, 'FLAC', 16000)
29+
main(input_uri, 'FLAC', 16000)
3030

3131
out, err = capsys.readouterr()
32-
assert '[]\n' == out
32+
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
3333

3434

3535
def test_gcs_uri():

speech/api/speech_rest.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def get_speech_service():
4040
credentials.authorize(http)
4141

4242
return discovery.build(
43-
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
43+
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
4444
# [END authenticating]
4545

4646

@@ -57,13 +57,13 @@ def main(speech_file):
5757
speech_content = base64.b64encode(speech.read())
5858

5959
service = get_speech_service()
60-
service_request = service.speech().recognize(
60+
service_request = service.speech().syncrecognize(
6161
body={
62-
'initialRequest': {
62+
'config': {
6363
'encoding': 'LINEAR16',
6464
'sampleRate': 16000
6565
},
66-
'audioRequest': {
66+
'audio': {
6767
'content': speech_content.decode('UTF-8')
6868
}
6969
})

speech/api/speech_streaming.py

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,22 @@
1414
# limitations under the License.
1515
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
1616

17+
from __future__ import division
18+
1719
import contextlib
1820
import re
1921
import threading
2022

2123
from gcloud.credentials import get_credentials
22-
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
24+
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
2325
from google.rpc import code_pb2
2426
from grpc.beta import implementations
2527
import pyaudio
2628

2729
# Audio recording parameters
2830
RATE = 16000
2931
CHANNELS = 1
30-
CHUNK = RATE // 10 # 100ms
32+
CHUNK = int(RATE / 10) # 100ms
3133

3234
# Keep the request alive for this many seconds
3335
DEADLINE_SECS = 8 * 60 * 60
@@ -43,15 +45,15 @@ def make_channel(host, port):
4345
creds = get_credentials().create_scoped([SPEECH_SCOPE])
4446
# Add a plugin to inject the creds into the header
4547
auth_header = (
46-
'Authorization',
47-
'Bearer ' + creds.get_access_token().access_token)
48+
'Authorization',
49+
'Bearer ' + creds.get_access_token().access_token)
4850
auth_plugin = implementations.metadata_call_credentials(
49-
lambda _, cb: cb([auth_header], None),
50-
name='google_creds')
51+
lambda _, cb: cb([auth_header], None),
52+
name='google_creds')
5153

5254
# compose the two together for both ssl and google auth
5355
composite_channel = implementations.composite_channel_credentials(
54-
ssl_channel, auth_plugin)
56+
ssl_channel, auth_plugin)
5557

5658
return implementations.secure_channel(host, port, composite_channel)
5759

@@ -75,41 +77,40 @@ def record_audio(channels, rate, chunk):
7577

7678

7779
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
78-
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
80+
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
81+
stream.
7982
8083
Args:
8184
stop_audio: A threading.Event object stops the recording when set.
8285
channels: How many audio channels to record.
8386
rate: The sampling rate.
8487
chunk: Buffer audio into chunks of this size before sending to the api.
8588
"""
86-
with record_audio(channels, rate, chunk) as audio_stream:
87-
# The initial request must contain metadata about the stream, so the
88-
# server knows how to interpret it.
89-
metadata = cloud_speech.InitialRecognizeRequest(
90-
encoding='LINEAR16', sample_rate=rate,
91-
# Note that setting interim_results to True means that you'll
92-
# likely get multiple results for the same bit of audio, as the
93-
# system re-interprets audio in the context of subsequent audio.
94-
# However, this will give us quick results without having to tell
95-
# the server when to finalize a piece of audio.
96-
interim_results=True, continuous=False,
97-
)
98-
data = audio_stream.read(chunk)
99-
audio_request = cloud_speech.AudioRequest(content=data)
100-
101-
yield cloud_speech.RecognizeRequest(
102-
initial_request=metadata,
103-
audio_request=audio_request)
89+
# The initial request must contain metadata about the stream, so the
90+
# server knows how to interpret it.
91+
recognition_config = cloud_speech.RecognitionConfig(
92+
encoding='LINEAR16', sample_rate=rate)
93+
streaming_config = cloud_speech.StreamingRecognitionConfig(
94+
config=recognition_config,
95+
# Note that setting interim_results to True means that you'll likely
96+
# get multiple results for the same bit of audio, as the system
97+
# re-interprets audio in the context of subsequent audio. However, this
98+
# will give us quick results without having to tell the server when to
99+
# finalize a piece of audio.
100+
interim_results=True, single_utterance=True
101+
)
102+
103+
yield cloud_speech.StreamingRecognizeRequest(
104+
streaming_config=streaming_config)
104105

106+
with record_audio(channels, rate, chunk) as audio_stream:
105107
while not stop_audio.is_set():
106108
data = audio_stream.read(chunk)
107109
if not data:
108110
raise StopIteration()
109-
# Subsequent requests can all just have the content
110-
audio_request = cloud_speech.AudioRequest(content=data)
111111

112-
yield cloud_speech.RecognizeRequest(audio_request=audio_request)
112+
# Subsequent requests can all just have the content
113+
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
113114

114115

115116
def listen_print_loop(recognize_stream):
@@ -136,7 +137,8 @@ def main():
136137
make_channel('speech.googleapis.com', 443)) as service:
137138
try:
138139
listen_print_loop(
139-
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
140+
service.StreamingRecognize(
141+
request_stream(stop_audio), DEADLINE_SECS))
140142
finally:
141143
# Stop the request stream once we're done with the loop - otherwise
142144
# it'll keep going in the thread that the grpc lib makes for it..

0 commit comments

Comments
 (0)