Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8743fe3

Browse files
committed
Move recognize() methods to Sample.
1 parent 0291dd4 commit 8743fe3

File tree

5 files changed

+229
-249
lines changed

5 files changed

+229
-249
lines changed

docs/speech-usage.rst

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ See: `Speech Asynchronous Recognize`_
6161
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
6262
... encoding=speech.Encoding.LINEAR16,
6363
... sample_rate=44100)
64-
>>> operation = client.async_recognize(sample, max_alternatives=2)
64+
>>> operation = sample.async_recognize(max_alternatives=2)
6565
>>> retry_count = 100
6666
>>> while retry_count > 0 and not operation.complete:
6767
... retry_count -= 1
@@ -94,8 +94,7 @@ Great Britian.
9494
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
9595
... encoding=speech.Encoding.FLAC,
9696
... sample_rate=44100)
97-
>>> operation = client.async_recognize(sample, max_alternatives=2)
98-
>>> alternatives = client.sync_recognize(
97+
>>> alternatives = sample.sync_recognize(
9998
... speech.Encoding.FLAC, 16000,
10099
... source_uri='gs://my-bucket/recording.flac', language_code='en-GB',
101100
... max_alternatives=2)
@@ -119,7 +118,7 @@ Example of using the profanity filter.
119118
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
120119
... encoding=speech.Encoding.FLAC,
121120
... sample_rate=44100)
122-
>>> alternatives = client.sync_recognize(sample, max_alternatives=1,
121+
>>> alternatives = sample.sync_recognize(max_alternatives=1,
123122
... profanity_filter=True)
124123
>>> for alternative in alternatives:
125124
... print('=' * 20)
@@ -141,7 +140,7 @@ words to the vocabulary of the recognizer.
141140
... encoding=speech.Encoding.FLAC,
142141
... sample_rate=44100)
143142
>>> hints = ['hi', 'good afternoon']
144-
>>> alternatives = client.sync_recognize(sample, max_alternatives=2,
143+
>>> alternatives = sample.sync_recognize(max_alternatives=2,
145144
... speech_context=hints)
146145
>>> for alternative in alternatives:
147146
... print('=' * 20)
@@ -171,7 +170,7 @@ speech data to possible text alternatives on the fly.
171170
... sample = client.sample(content=stream,
172171
... encoding=speech.Encoding.LINEAR16,
173172
... sample_rate=16000)
174-
... results = list(client.streaming_recognize(sample))
173+
... results = list(sample.streaming_recognize())
175174
>>> print(results[0].alternatives[0].transcript)
176175
'hello'
177176
>>> print(results[0].alternatives[0].confidence)
@@ -194,8 +193,7 @@ See: `Single Utterance`_
194193
... sample = client.sample(content=stream,
195194
... encoding=speech.Encoding.LINEAR16,
196195
... sample_rate=16000)
197-
... responses = client.streaming_recognize(sample,
198-
... single_utterance=True)
196+
... responses = sample.streaming_recognize(single_utterance=True)
199197
... results = list(responses)
200198
>>> print(results[0].alternatives[0].transcript)
201199
hello
@@ -214,8 +212,7 @@ If ``interim_results`` is set to :data:`True`, interim results
214212
... sample = client.sample(content=stream,
215213
... encoding=speech.Encoding.LINEAR16,
216214
... sample_rate=16000)
217-
... for results in client.streaming_recognize(sample,
218-
... interim_results=True):
215+
... for results in sample.streaming_recognize(interim_results=True):
219216
... print('=' * 20)
220217
... print(results[0].alternatives[0].transcript)
221218
... print(results[0].alternatives[0].confidence)

speech/google/cloud/speech/client.py

Lines changed: 2 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@
2525
from google.cloud.speech._gax import GAPICSpeechAPI
2626
from google.cloud.speech.alternative import Alternative
2727
from google.cloud.speech.connection import Connection
28-
from google.cloud.speech.encoding import Encoding
2928
from google.cloud.speech.operation import Operation
30-
from google.cloud.speech.result import StreamingSpeechResult
3129
from google.cloud.speech.sample import Sample
3230

3331

@@ -65,58 +63,7 @@ def __init__(self, credentials=None, http=None, use_gax=None):
6563
_connection_class = Connection
6664
_speech_api = None
6765

68-
def async_recognize(self, sample, language_code=None,
69-
max_alternatives=None, profanity_filter=None,
70-
speech_context=None):
71-
"""Asychronous Recognize request to Google Speech API.
72-
73-
.. _async_recognize: https://cloud.google.com/speech/reference/\
74-
rest/v1beta1/speech/asyncrecognize
75-
76-
See `async_recognize`_.
77-
78-
:type sample: :class:`~google.cloud.speech.sample.Sample`
79-
:param sample: Instance of ``Sample`` containing audio information.
80-
81-
:type language_code: str
82-
:param language_code: (Optional) The language of the supplied audio as
83-
BCP-47 language tag. Example: ``'en-GB'``.
84-
If omitted, defaults to ``'en-US'``.
85-
86-
:type max_alternatives: int
87-
:param max_alternatives: (Optional) Maximum number of recognition
88-
hypotheses to be returned. The server may
89-
return fewer than maxAlternatives.
90-
Valid values are 0-30. A value of 0 or 1
91-
will return a maximum of 1. Defaults to 1
92-
93-
:type profanity_filter: bool
94-
:param profanity_filter: If True, the server will attempt to filter
95-
out profanities, replacing all but the
96-
initial character in each filtered word with
97-
asterisks, e.g. ``'f***'``. If False or
98-
omitted, profanities won't be filtered out.
99-
100-
:type speech_context: list
101-
:param speech_context: A list of strings (max 50) containing words and
102-
phrases "hints" so that the speech recognition
103-
is more likely to recognize them. This can be
104-
used to improve the accuracy for specific words
105-
and phrases. This can also be used to add new
106-
words to the vocabulary of the recognizer.
107-
108-
:rtype: :class:`~google.cloud.speech.operation.Operation`
109-
:returns: Operation for asynchronous request to Google Speech API.
110-
"""
111-
if sample.encoding is not Encoding.LINEAR16:
112-
raise ValueError('Only LINEAR16 encoding is supported by '
113-
'asynchronous speech requests.')
114-
api = self.speech_api
115-
return api.async_recognize(sample, language_code, max_alternatives,
116-
profanity_filter, speech_context)
117-
118-
@staticmethod
119-
def sample(content=None, source_uri=None, encoding=None,
66+
def sample(self, content=None, source_uri=None, encoding=None,
12067
sample_rate=None):
12168
"""Factory: construct Sample to use when making recognize requests.
12269
@@ -148,7 +95,7 @@ def sample(content=None, source_uri=None, encoding=None,
14895
:returns: Instance of ``Sample``.
14996
"""
15097
return Sample(content=content, source_uri=source_uri,
151-
encoding=encoding, sample_rate=sample_rate)
98+
encoding=encoding, sample_rate=sample_rate, client=self)
15299

153100
@property
154101
def speech_api(self):
@@ -160,145 +107,6 @@ def speech_api(self):
160107
self._speech_api = _JSONSpeechAPI(self)
161108
return self._speech_api
162109

163-
def streaming_recognize(self, sample, language_code=None,
164-
max_alternatives=None, profanity_filter=None,
165-
speech_context=None, single_utterance=False,
166-
interim_results=False):
167-
"""Streaming speech recognition.
168-
169-
.. note::
170-
171-
Streaming recognition requests are limited to 1 minute of audio.
172-
See: https://cloud.google.com/speech/limits#content
173-
174-
Yields: Instance of
175-
:class:`~google.cloud.speech.result.StreamingSpeechResult`
176-
containing results and metadata from the streaming request.
177-
178-
:type sample: :class:`~google.cloud.speech.sample.Sample`
179-
:param sample: Instance of ``Sample`` containing audio information.
180-
181-
:type language_code: str
182-
:param language_code: (Optional) The language of the supplied audio as
183-
BCP-47 language tag. Example: ``'en-GB'``.
184-
If omitted, defaults to ``'en-US'``.
185-
186-
:type max_alternatives: int
187-
:param max_alternatives: (Optional) Maximum number of recognition
188-
hypotheses to be returned. The server may
189-
return fewer than maxAlternatives.
190-
Valid values are 0-30. A value of 0 or 1
191-
will return a maximum of 1. Defaults to 1
192-
193-
:type profanity_filter: bool
194-
:param profanity_filter: If True, the server will attempt to filter
195-
out profanities, replacing all but the
196-
initial character in each filtered word with
197-
asterisks, e.g. ``'f***'``. If False or
198-
omitted, profanities won't be filtered out.
199-
200-
:type speech_context: list
201-
:param speech_context: A list of strings (max 50) containing words and
202-
phrases "hints" so that the speech recognition
203-
is more likely to recognize them. This can be
204-
used to improve the accuracy for specific words
205-
and phrases. This can also be used to add new
206-
words to the vocabulary of the recognizer.
207-
208-
:type single_utterance: bool
209-
:param single_utterance: (Optional) If false or omitted, the recognizer
210-
will perform continuous recognition
211-
(continuing to process audio even if the user
212-
pauses speaking) until the client closes the
213-
output stream (gRPC API) or when the maximum
214-
time limit has been reached. Multiple
215-
SpeechRecognitionResults with the is_final
216-
flag set to true may be returned.
217-
If true, the recognizer will detect a single
218-
spoken utterance. When it detects that the
219-
user has paused or stopped speaking, it will
220-
return an END_OF_UTTERANCE event and cease
221-
recognition. It will return no more than one
222-
SpeechRecognitionResult with the is_final flag
223-
set to true.
224-
225-
:type interim_results: bool
226-
:param interim_results: (Optional) If true, interim results (tentative
227-
hypotheses) may be returned as they become
228-
available (these interim results are indicated
229-
with the ``is_final=False`` flag). If false or
230-
omitted, only is_final=true result(s) are
231-
returned.
232-
233-
:raises: EnvironmentError if gRPC is not available.
234-
"""
235-
if not self._use_gax:
236-
raise EnvironmentError('gRPC is required to use this API.')
237-
238-
responses = self.speech_api.streaming_recognize(sample, language_code,
239-
max_alternatives,
240-
profanity_filter,
241-
speech_context,
242-
single_utterance,
243-
interim_results)
244-
for response in responses:
245-
for result in response.results:
246-
if result.is_final or interim_results:
247-
yield StreamingSpeechResult.from_pb(result)
248-
249-
def sync_recognize(self, sample, language_code=None,
250-
max_alternatives=None, profanity_filter=None,
251-
speech_context=None):
252-
"""Synchronous Speech Recognition.
253-
254-
.. _sync_recognize: https://cloud.google.com/speech/reference/\
255-
rest/v1beta1/speech/syncrecognize
256-
257-
See `sync_recognize`_.
258-
259-
:type sample: :class:`~google.cloud.speech.sample.Sample`
260-
:param sample: Instance of ``Sample`` containing audio information.
261-
262-
:type language_code: str
263-
:param language_code: (Optional) The language of the supplied audio as
264-
BCP-47 language tag. Example: ``'en-GB'``.
265-
If omitted, defaults to ``'en-US'``.
266-
267-
:type max_alternatives: int
268-
:param max_alternatives: (Optional) Maximum number of recognition
269-
hypotheses to be returned. The server may
270-
return fewer than maxAlternatives.
271-
Valid values are 0-30. A value of 0 or 1
272-
will return a maximum of 1. Defaults to 1
273-
274-
:type profanity_filter: bool
275-
:param profanity_filter: If True, the server will attempt to filter
276-
out profanities, replacing all but the
277-
initial character in each filtered word with
278-
asterisks, e.g. ``'f***'``. If False or
279-
omitted, profanities won't be filtered out.
280-
281-
:type speech_context: list
282-
:param speech_context: A list of strings (max 50) containing words and
283-
phrases "hints" so that the speech recognition
284-
is more likely to recognize them. This can be
285-
used to improve the accuracy for specific words
286-
and phrases. This can also be used to add new
287-
words to the vocabulary of the recognizer.
288-
289-
:rtype: list
290-
:returns: A list of dictionaries. One dict for each alternative. Each
291-
dictionary typically contains two keys (though not
292-
all will be present in all cases)
293-
294-
* ``transcript``: The detected text from the audio recording.
295-
* ``confidence``: The confidence in language detection, float
296-
between 0 and 1.
297-
"""
298-
api = self.speech_api
299-
return api.sync_recognize(sample, language_code, max_alternatives,
300-
profanity_filter, speech_context)
301-
302110

303111
class _JSONSpeechAPI(object):
304112
"""Speech API for interacting with the JSON/REST version of the API.

0 commit comments

Comments
 (0)