2525from google .cloud .speech ._gax import GAPICSpeechAPI
2626from google .cloud .speech .alternative import Alternative
2727from google .cloud .speech .connection import Connection
28- from google .cloud .speech .encoding import Encoding
2928from google .cloud .speech .operation import Operation
30- from google .cloud .speech .result import StreamingSpeechResult
3129from google .cloud .speech .sample import Sample
3230
3331
@@ -65,58 +63,7 @@ def __init__(self, credentials=None, http=None, use_gax=None):
6563 _connection_class = Connection
6664 _speech_api = None
6765
68- def async_recognize (self , sample , language_code = None ,
69- max_alternatives = None , profanity_filter = None ,
70- speech_context = None ):
71- """Asychronous Recognize request to Google Speech API.
72-
73- .. _async_recognize: https://cloud.google.com/speech/reference/\
74- rest/v1beta1/speech/asyncrecognize
75-
76- See `async_recognize`_.
77-
78- :type sample: :class:`~google.cloud.speech.sample.Sample`
79- :param sample: Instance of ``Sample`` containing audio information.
80-
81- :type language_code: str
82- :param language_code: (Optional) The language of the supplied audio as
83- BCP-47 language tag. Example: ``'en-GB'``.
84- If omitted, defaults to ``'en-US'``.
85-
86- :type max_alternatives: int
87- :param max_alternatives: (Optional) Maximum number of recognition
88- hypotheses to be returned. The server may
89- return fewer than maxAlternatives.
90- Valid values are 0-30. A value of 0 or 1
91- will return a maximum of 1. Defaults to 1
92-
93- :type profanity_filter: bool
94- :param profanity_filter: If True, the server will attempt to filter
95- out profanities, replacing all but the
96- initial character in each filtered word with
97- asterisks, e.g. ``'f***'``. If False or
98- omitted, profanities won't be filtered out.
99-
100- :type speech_context: list
101- :param speech_context: A list of strings (max 50) containing words and
102- phrases "hints" so that the speech recognition
103- is more likely to recognize them. This can be
104- used to improve the accuracy for specific words
105- and phrases. This can also be used to add new
106- words to the vocabulary of the recognizer.
107-
108- :rtype: :class:`~google.cloud.speech.operation.Operation`
109- :returns: Operation for asynchronous request to Google Speech API.
110- """
111- if sample .encoding is not Encoding .LINEAR16 :
112- raise ValueError ('Only LINEAR16 encoding is supported by '
113- 'asynchronous speech requests.' )
114- api = self .speech_api
115- return api .async_recognize (sample , language_code , max_alternatives ,
116- profanity_filter , speech_context )
117-
118- @staticmethod
119- def sample (content = None , source_uri = None , encoding = None ,
66+ def sample (self , content = None , source_uri = None , encoding = None ,
12067 sample_rate = None ):
12168 """Factory: construct Sample to use when making recognize requests.
12269
@@ -148,7 +95,7 @@ def sample(content=None, source_uri=None, encoding=None,
14895 :returns: Instance of ``Sample``.
14996 """
15097 return Sample (content = content , source_uri = source_uri ,
151- encoding = encoding , sample_rate = sample_rate )
98+ encoding = encoding , sample_rate = sample_rate , client = self )
15299
153100 @property
154101 def speech_api (self ):
@@ -160,145 +107,6 @@ def speech_api(self):
160107 self ._speech_api = _JSONSpeechAPI (self )
161108 return self ._speech_api
162109
163- def streaming_recognize (self , sample , language_code = None ,
164- max_alternatives = None , profanity_filter = None ,
165- speech_context = None , single_utterance = False ,
166- interim_results = False ):
167- """Streaming speech recognition.
168-
169- .. note::
170-
171- Streaming recognition requests are limited to 1 minute of audio.
172- See: https://cloud.google.com/speech/limits#content
173-
174- Yields: Instance of
175- :class:`~google.cloud.speech.result.StreamingSpeechResult`
176- containing results and metadata from the streaming request.
177-
178- :type sample: :class:`~google.cloud.speech.sample.Sample`
179- :param sample: Instance of ``Sample`` containing audio information.
180-
181- :type language_code: str
182- :param language_code: (Optional) The language of the supplied audio as
183- BCP-47 language tag. Example: ``'en-GB'``.
184- If omitted, defaults to ``'en-US'``.
185-
186- :type max_alternatives: int
187- :param max_alternatives: (Optional) Maximum number of recognition
188- hypotheses to be returned. The server may
189- return fewer than maxAlternatives.
190- Valid values are 0-30. A value of 0 or 1
191- will return a maximum of 1. Defaults to 1
192-
193- :type profanity_filter: bool
194- :param profanity_filter: If True, the server will attempt to filter
195- out profanities, replacing all but the
196- initial character in each filtered word with
197- asterisks, e.g. ``'f***'``. If False or
198- omitted, profanities won't be filtered out.
199-
200- :type speech_context: list
201- :param speech_context: A list of strings (max 50) containing words and
202- phrases "hints" so that the speech recognition
203- is more likely to recognize them. This can be
204- used to improve the accuracy for specific words
205- and phrases. This can also be used to add new
206- words to the vocabulary of the recognizer.
207-
208- :type single_utterance: bool
209- :param single_utterance: (Optional) If false or omitted, the recognizer
210- will perform continuous recognition
211- (continuing to process audio even if the user
212- pauses speaking) until the client closes the
213- output stream (gRPC API) or when the maximum
214- time limit has been reached. Multiple
215- SpeechRecognitionResults with the is_final
216- flag set to true may be returned.
217- If true, the recognizer will detect a single
218- spoken utterance. When it detects that the
219- user has paused or stopped speaking, it will
220- return an END_OF_UTTERANCE event and cease
221- recognition. It will return no more than one
222- SpeechRecognitionResult with the is_final flag
223- set to true.
224-
225- :type interim_results: bool
226- :param interim_results: (Optional) If true, interim results (tentative
227- hypotheses) may be returned as they become
228- available (these interim results are indicated
229- with the ``is_final=False`` flag). If false or
230- omitted, only is_final=true result(s) are
231- returned.
232-
233- :raises: EnvironmentError if gRPC is not available.
234- """
235- if not self ._use_gax :
236- raise EnvironmentError ('gRPC is required to use this API.' )
237-
238- responses = self .speech_api .streaming_recognize (sample , language_code ,
239- max_alternatives ,
240- profanity_filter ,
241- speech_context ,
242- single_utterance ,
243- interim_results )
244- for response in responses :
245- for result in response .results :
246- if result .is_final or interim_results :
247- yield StreamingSpeechResult .from_pb (result )
248-
249- def sync_recognize (self , sample , language_code = None ,
250- max_alternatives = None , profanity_filter = None ,
251- speech_context = None ):
252- """Synchronous Speech Recognition.
253-
254- .. _sync_recognize: https://cloud.google.com/speech/reference/\
255- rest/v1beta1/speech/syncrecognize
256-
257- See `sync_recognize`_.
258-
259- :type sample: :class:`~google.cloud.speech.sample.Sample`
260- :param sample: Instance of ``Sample`` containing audio information.
261-
262- :type language_code: str
263- :param language_code: (Optional) The language of the supplied audio as
264- BCP-47 language tag. Example: ``'en-GB'``.
265- If omitted, defaults to ``'en-US'``.
266-
267- :type max_alternatives: int
268- :param max_alternatives: (Optional) Maximum number of recognition
269- hypotheses to be returned. The server may
270- return fewer than maxAlternatives.
271- Valid values are 0-30. A value of 0 or 1
272- will return a maximum of 1. Defaults to 1
273-
274- :type profanity_filter: bool
275- :param profanity_filter: If True, the server will attempt to filter
276- out profanities, replacing all but the
277- initial character in each filtered word with
278- asterisks, e.g. ``'f***'``. If False or
279- omitted, profanities won't be filtered out.
280-
281- :type speech_context: list
282- :param speech_context: A list of strings (max 50) containing words and
283- phrases "hints" so that the speech recognition
284- is more likely to recognize them. This can be
285- used to improve the accuracy for specific words
286- and phrases. This can also be used to add new
287- words to the vocabulary of the recognizer.
288-
289- :rtype: list
290- :returns: A list of dictionaries. One dict for each alternative. Each
291- dictionary typically contains two keys (though not
292- all will be present in all cases)
293-
294- * ``transcript``: The detected text from the audio recording.
295- * ``confidence``: The confidence in language detection, float
296- between 0 and 1.
297- """
298- api = self .speech_api
299- return api .sync_recognize (sample , language_code , max_alternatives ,
300- profanity_filter , speech_context )
301-
302110
303111class _JSONSpeechAPI (object ):
304112 """Speech API for interacting with the JSON/REST version of the API.
0 commit comments