14
14
# limitations under the License.
15
15
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
16
16
17
+ from __future__ import division
18
+
17
19
import contextlib
18
20
import re
19
21
import threading
20
22
21
23
from gcloud .credentials import get_credentials
22
- from google .cloud .speech .v1 import cloud_speech_pb2 as cloud_speech
24
+ from google .cloud .speech .v1beta1 import cloud_speech_pb2 as cloud_speech
23
25
from google .rpc import code_pb2
24
26
from grpc .beta import implementations
25
27
import pyaudio
26
28
27
29
# Audio recording parameters
28
30
RATE = 16000
29
31
CHANNELS = 1
30
- CHUNK = RATE // 10 # 100ms
32
+ CHUNK = int ( RATE / 10 ) # 100ms
31
33
32
34
# Keep the request alive for this many seconds
33
35
DEADLINE_SECS = 8 * 60 * 60
@@ -43,15 +45,15 @@ def make_channel(host, port):
43
45
creds = get_credentials ().create_scoped ([SPEECH_SCOPE ])
44
46
# Add a plugin to inject the creds into the header
45
47
auth_header = (
46
- 'Authorization' ,
47
- 'Bearer ' + creds .get_access_token ().access_token )
48
+ 'Authorization' ,
49
+ 'Bearer ' + creds .get_access_token ().access_token )
48
50
auth_plugin = implementations .metadata_call_credentials (
49
- lambda _ , cb : cb ([auth_header ], None ),
50
- name = 'google_creds' )
51
+ lambda _ , cb : cb ([auth_header ], None ),
52
+ name = 'google_creds' )
51
53
52
54
# compose the two together for both ssl and google auth
53
55
composite_channel = implementations .composite_channel_credentials (
54
- ssl_channel , auth_plugin )
56
+ ssl_channel , auth_plugin )
55
57
56
58
return implementations .secure_channel (host , port , composite_channel )
57
59
@@ -75,41 +77,40 @@ def record_audio(channels, rate, chunk):
75
77
76
78
77
79
def request_stream (stop_audio , channels = CHANNELS , rate = RATE , chunk = CHUNK ):
78
- """Yields `RecognizeRequest`s constructed from a recording audio stream.
80
+ """Yields `StreamingRecognizeRequest`s constructed from a recording audio
81
+ stream.
79
82
80
83
Args:
81
84
stop_audio: A threading.Event object stops the recording when set.
82
85
channels: How many audio channels to record.
83
86
rate: The sampling rate.
84
87
chunk: Buffer audio into chunks of this size before sending to the api.
85
88
"""
86
- with record_audio (channels , rate , chunk ) as audio_stream :
87
- # The initial request must contain metadata about the stream, so the
88
- # server knows how to interpret it.
89
- metadata = cloud_speech .InitialRecognizeRequest (
90
- encoding = 'LINEAR16' , sample_rate = rate ,
91
- # Note that setting interim_results to True means that you'll
92
- # likely get multiple results for the same bit of audio, as the
93
- # system re-interprets audio in the context of subsequent audio.
94
- # However, this will give us quick results without having to tell
95
- # the server when to finalize a piece of audio.
96
- interim_results = True , continuous = False ,
97
- )
98
- data = audio_stream .read (chunk )
99
- audio_request = cloud_speech .AudioRequest (content = data )
100
-
101
- yield cloud_speech .RecognizeRequest (
102
- initial_request = metadata ,
103
- audio_request = audio_request )
89
+ # The initial request must contain metadata about the stream, so the
90
+ # server knows how to interpret it.
91
+ recognition_config = cloud_speech .RecognitionConfig (
92
+ encoding = 'LINEAR16' , sample_rate = rate )
93
+ streaming_config = cloud_speech .StreamingRecognitionConfig (
94
+ config = recognition_config ,
95
+ # Note that setting interim_results to True means that you'll likely
96
+ # get multiple results for the same bit of audio, as the system
97
+ # re-interprets audio in the context of subsequent audio. However, this
98
+ # will give us quick results without having to tell the server when to
99
+ # finalize a piece of audio.
100
+ interim_results = True , single_utterance = True
101
+ )
102
+
103
+ yield cloud_speech .StreamingRecognizeRequest (
104
+ streaming_config = streaming_config )
104
105
106
+ with record_audio (channels , rate , chunk ) as audio_stream :
105
107
while not stop_audio .is_set ():
106
108
data = audio_stream .read (chunk )
107
109
if not data :
108
110
raise StopIteration ()
109
- # Subsequent requests can all just have the content
110
- audio_request = cloud_speech .AudioRequest (content = data )
111
111
112
- yield cloud_speech .RecognizeRequest (audio_request = audio_request )
112
+ # Subsequent requests can all just have the content
113
+ yield cloud_speech .StreamingRecognizeRequest (audio_content = data )
113
114
114
115
115
116
def listen_print_loop (recognize_stream ):
@@ -136,7 +137,8 @@ def main():
136
137
make_channel ('speech.googleapis.com' , 443 )) as service :
137
138
try :
138
139
listen_print_loop (
139
- service .Recognize (request_stream (stop_audio ), DEADLINE_SECS ))
140
+ service .StreamingRecognize (
141
+ request_stream (stop_audio ), DEADLINE_SECS ))
140
142
finally :
141
143
# Stop the request stream once we're done with the loop - otherwise
142
144
# it'll keep going in the thread that the grpc lib makes for it..
0 commit comments