diff --git a/speech/README.md b/speech/README.md new file mode 100644 index 00000000000..35856267a6a --- /dev/null +++ b/speech/README.md @@ -0,0 +1,14 @@ +# Google Cloud Speech API Samples + +These samples show how to use the [Google Cloud Speech API][speech-api] +to transcribe audio files, as well as live audio from your computer's +microphone. + +* [`api-client/`](api-client) contains samples that use the [Google API Client + Library for Python][rest-client] to make REST calls to the API. +* [`grpc/`](grpc) contains samples that use the more-efficient (though sometimes + more complex) [GRPC][grpc] API. The GRPC API also allows streaming requests. + +[speech-api]: http://cloud.google.com/speech +[rest-client]: https://developers.google.com/api-client-library/python/ +[grpc]: http://grpc.io diff --git a/speech/api-client/README.md b/speech/api-client/README.md new file mode 100644 index 00000000000..9c443f6a72a --- /dev/null +++ b/speech/api-client/README.md @@ -0,0 +1,85 @@ +# Google Cloud Speech REST API Samples + +These samples show how to use the [Google Cloud Speech API](http://cloud.google.com/speech) +to transcribe audio files, using the REST-based [Google API Client Library for +Python](https://developers.google.com/api-client-library/python/). + +For samples that use the more-efficient [GRPC](http://grpc.io)-based client +library (including a streaming sample that transcribes audio streamed from your +microphone), see [../grpc/](../grpc/). + +## Prerequisites + +### Enable the Speech API + +If you have not already done so, [enable the Google Cloud Speech +API][console-speech] for your project. + +[console-speech]: https://console.cloud.google.com/apis/api/speech.googleapis.com/overview?project=_ + +### Authentication + +These samples use service accounts for authentication. + +* Visit the [Cloud Console][cloud-console], and navigate to: + + `API Manager > Credentials > Create credentials > Service account key > New + service account`. +* Create a new service account, and download the json credentials file. +* Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to your + downloaded service account credentials: + + export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json + + If you do not do this, the REST api will return a 403. + +See the [Cloud Platform Auth Guide][auth-guide] for more information. + +[cloud-console]: https://console.cloud.google.com +[auth-guide]: https://cloud.google.com/docs/authentication#developer_workflow + +### Setup + +* Clone this repo + + ```sh + git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + cd python-docs-samples/speech/api-client + ``` + +* Create a [virtualenv][virtualenv]. This isolates the python dependencies + you're about to install, to minimize conflicts with any existing libraries you + might already have. + + ```sh + virtualenv env + source env/bin/activate + ``` + +* Install the dependencies + + ```sh + pip install -r requirements.txt + ``` + +[pip]: https://pip.pypa.io/en/stable/installing/ +[virtualenv]: https://virtualenv.pypa.io/en/stable/installation/ + +## Run the sample + +Each of the samples takes the audio file to transcribe as the first argument. +For example: + +```sh +python transcribe.py resources/audio.raw +``` + +You should see a response with the transcription result. + +### Deactivate virtualenv + +When you're done running the sample, you can exit your virtualenv: + +``` +deactivate +``` diff --git a/speech/api/requirements-speech_rest.txt b/speech/api-client/requirements.txt similarity index 100% rename from speech/api/requirements-speech_rest.txt rename to speech/api-client/requirements.txt diff --git a/speech/api/resources/audio.raw b/speech/api-client/resources/audio.raw similarity index 100% rename from speech/api/resources/audio.raw rename to speech/api-client/resources/audio.raw diff --git a/speech/api/resources/audio2.raw b/speech/api-client/resources/audio2.raw similarity index 100% rename from speech/api/resources/audio2.raw rename to speech/api-client/resources/audio2.raw diff --git a/speech/api/speech_rest.py b/speech/api-client/transcribe.py similarity index 100% rename from speech/api/speech_rest.py rename to speech/api-client/transcribe.py diff --git a/speech/api/speech_async_rest.py b/speech/api-client/transcribe_async.py similarity index 100% rename from speech/api/speech_async_rest.py rename to speech/api-client/transcribe_async.py diff --git a/speech/api/speech_async_rest_test.py b/speech/api-client/transcribe_async_test.py similarity index 95% rename from speech/api/speech_async_rest_test.py rename to speech/api-client/transcribe_async_test.py index d9f79e6aac5..d90f45608c8 100644 --- a/speech/api/speech_async_rest_test.py +++ b/speech/api-client/transcribe_async_test.py @@ -13,7 +13,7 @@ import re -from speech_async_rest import main +from transcribe_async import main def test_main(resource, capsys): diff --git a/speech/api/speech_rest_test.py b/speech/api-client/transcribe_test.py similarity index 96% rename from speech/api/speech_rest_test.py rename to speech/api-client/transcribe_test.py index 0204e565ab8..c8cb0a70333 100644 --- a/speech/api/speech_rest_test.py +++ b/speech/api-client/transcribe_test.py @@ -13,7 +13,7 @@ import re -from speech_rest import main +from transcribe import main def test_main(resource, capsys): diff --git a/speech/api/README.md b/speech/api/README.md deleted file mode 100644 index 00191eab38f..00000000000 --- a/speech/api/README.md +++ /dev/null @@ -1,133 +0,0 @@ - -# Google Cloud Speech API Samples - -These examples demo accessing the [Google Cloud Speech API](http://cloud.google.com/speech) -in streaming mode (via its gRPC API) and in non-streaming mode (via its REST -API). - -## Prerequisites - -### Enable the Speech API - -If you have not already done so, -[enable the Google Cloud Speech API for your project](https://console.cloud.google.com/apis/api/speech.googleapis.com/overview). -You must be whitelisted to do this. - - -### Set Up to Authenticate With Your Project's Credentials - -The example uses a service account for OAuth2 authentication. -So next, set up to authenticate with the Speech API using your project's -service account credentials. - -Visit the [Cloud Console](https://console.cloud.google.com), and navigate to: -`API Manager > Credentials > Create credentials > -Service account key > New service account`. -Create a new service account, and download the json credentials file. - -Then, set -the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to your -downloaded service account credentials before running this example: - - export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json - -If you do not do this, the REST api will return a 403. The streaming sample will -just sort of hang silently. - -See the -[Cloud Platform Auth Guide](https://cloud.google.com/docs/authentication#developer_workflow) -for more information. - -### Setup - -Before running these samples perform the steps: - -* Clone this repo - ``` - git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git - cd python-docs-samples/speech/api - ``` - -* Create a [virtualenv][virtualenv] - ``` - virtualenv env - source env/bin/activate - ``` - -### Install the dependencies - -The sample uses the [PyAudio][pyaudio] library to stream audio from your computer's microphone. PyAudio depends on [PortAudio][portaudio], which may need to be installed separately, depending on your platform: - -* Install the [pyAudio dependencies][pyaudio-install]. - -* If you're running the `speech_rest.py` sample: - - ```sh - $ pip install -r requirements-speech_rest.txt - ``` - -* If you're running the `speech_streaming.py` sample: - - ```sh - $ pip install -r requirements-speech_grpc.txt - ``` - -[pyaudio]: https://people.csail.mit.edu/hubert/pyaudio/ -[portaudio]: http://www.portaudio.com/ -[pyaudio-install]: https://people.csail.mit.edu/hubert/pyaudio/#downloads -[pip]: https://pip.pypa.io/en/stable/installing/ -[virtualenv]: https://virtualenv.pypa.io/en/stable/installation/ -[home-page]: https://github.com/GoogleCloudPlatform/python-docs-samples -[virtualenv]: https://virtualenv.pypa.io/en/stable/installation/ - -### Troubleshooting - -#### PortAudio on OS X - -If you see the error - - fatal error: 'portaudio.h' file not found - -Try adding the following to your ~/.pydistutils.cfg file, -substituting in your appropriate brew Cellar directory: - - include_dirs=/usr/local/Cellar/portaudio/19.20140130/include/ - library_dirs=/usr/local/YourUsername/homebrew/Cellar/portaudio/19.20140130/lib/ - -## Run the example - -* To run the `speech_rest.py` sample: - - ```sh - $ python speech_rest.py resources/audio.raw - ``` - - You should see a response with the transcription result. - -* To run the `speech_async_rest.py` sample: - - ```sh - $ python speech_async_rest.py resources/audio.raw - ``` - - You should see a response with the transcription result. - -* To run the `speech_streaming.py` sample: - - ```sh - $ python speech_streaming.py - ``` - - The sample will run in a continuous loop, printing the data and metadata - it receives from the Speech API, which includes alternative transcriptions - of what it hears, and a confidence score. Say "exit" to exit the loop. - - Note that the `speech_streaming.py` sample does not yet support python 3, as - the upstream `grpcio` library's support is [not yet - complete](https://github.com/grpc/grpc/issues/282). - -### Deactivate virtualenv - -``` -deactivate -``` diff --git a/speech/api/resources/audio.flac b/speech/api/resources/audio.flac deleted file mode 100644 index 44d6f9ecd2a..00000000000 Binary files a/speech/api/resources/audio.flac and /dev/null differ diff --git a/speech/api/speech_async_grpc_test.py b/speech/api/speech_async_grpc_test.py deleted file mode 100644 index 56c268212c8..00000000000 --- a/speech/api/speech_async_grpc_test.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import re - -import pytest - -from speech_async_grpc import _gcs_uri -from speech_async_grpc import main - - -def test_main(cloud_config, capsys): - input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) - - main(input_uri, 'FLAC', 16000) - - out, err = capsys.readouterr() - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) - - -def test_gcs_uri(): - _gcs_uri('gs://bucket/path') - with pytest.raises(argparse.ArgumentTypeError): - _gcs_uri('/local/path') diff --git a/speech/api/speech_grpc_test.py b/speech/api/speech_grpc_test.py deleted file mode 100644 index ef6cee19e94..00000000000 --- a/speech/api/speech_grpc_test.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re - -import pytest - -from speech_grpc import _gcs_uri -from speech_grpc import main - - -def test_main(cloud_config, capsys): - input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket) - - main(input_uri, 'FLAC', 16000) - - out, err = capsys.readouterr() - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) - - -def test_gcs_uri(): - _gcs_uri('gs://bucket/path') - with pytest.raises(ValueError): - _gcs_uri('/local/path') diff --git a/speech/grpc/README.md b/speech/grpc/README.md new file mode 100644 index 00000000000..c61fc0899d3 --- /dev/null +++ b/speech/grpc/README.md @@ -0,0 +1,148 @@ +# Google Cloud Speech GRPC API Samples + +These samples show how to use the [Google Cloud Speech API][speech-api] +to transcribe audio files, as well as live audio from your computer's +microphone. + +[speech-api]: http://cloud.google.com/speech + +## Prerequisites + +### Enable the Speech API + +If you have not already done so, [enable the Google Cloud Speech +API](console-speech) for your project. + +[console-speech]: https://console.cloud.google.com/apis/api/speech.googleapis.com/overview?project=_ + +### Authentication + +These samples use service accounts for authentication. + +* Visit the [Cloud Console][cloud-console], and navigate to: + + `API Manager > Credentials > Create credentials > Service account key > New + service account`. +* Create a new service account, and download the json credentials file. +* Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point to your + downloaded service account credentials: + + export GOOGLE_APPLICATION_CREDENTIALS=/path/to/your/credentials-key.json + + If you do not do this, the streaming sample will just sort of hang silently. + +See the [Cloud Platform Auth Guide][auth-guide] for more information. + +[cloud-console]: https://console.cloud.google.com +[auth-guide]: https://cloud.google.com/docs/authentication#developer_workflow + +### Setup + +* Clone this repo + + ```sh + git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + cd python-docs-samples/speech/api + ``` + +* Create a [virtualenv][virtualenv]. This isolates the python dependencies + you're about to install, to minimize conflicts with any existing libraries you + might already have. + + ```sh + virtualenv env + source env/bin/activate + ``` + +* Install [PortAudio][portaudio]. The `transcribe_streaming.py` sample uses the + [PyAudio][pyaudio] library to stream audio from your computer's + microphone. PyAudio depends on PortAudio for cross-platform compatibility, and + is installed differently depending on the platform. For example: + + * For Mac OS X, you can use [Homebrew][brew]: + + ```sh + brew install portaudio + ``` + + * For Debian / Ubuntu Linux: + + ```sh + apt-get install portaudio19-dev python-all-dev + ``` + + * Windows may work without having to install PortAudio explicitly (it will get + installed with PyAudio, when you run `python -m pip install ...` below). + + * For more details, see the [PyAudio installation][pyaudio-install] page. + +* Install the python dependencies: + + ```sh + pip install -r requirements.txt + ``` + +[pyaudio]: https://people.csail.mit.edu/hubert/pyaudio/ +[portaudio]: http://www.portaudio.com/ +[pyaudio-install]: https://people.csail.mit.edu/hubert/pyaudio/#downloads +[pip]: https://pip.pypa.io/en/stable/installing/ +[virtualenv]: https://virtualenv.pypa.io/en/stable/installation/ +[brew]: http://brew.sh + +### Troubleshooting + +#### PortAudio on OS X + +If you see the error + + fatal error: 'portaudio.h' file not found + +Try adding the following to your `~/.pydistutils.cfg` file, +substituting in your appropriate brew Cellar directory: + + include_dirs=/usr/local/Cellar/portaudio/19.20140130/include/ + library_dirs=/usr/local/$USER/homebrew/Cellar/portaudio/19.20140130/lib/ + +## Run the sample + +* To run the `transcribe_streaming.py` sample: + + ```sh + python transcribe_streaming.py + ``` + + The sample will run in a continuous loop, printing the data and metadata + it receives from the Speech API, which includes alternative transcriptions + of what it hears, and a confidence score. Say "exit" to exit the loop. + +* To run the `transcribe_async.py` sample: + + ```sh + $ python transcribe_async.py gs://python-docs-samples-tests/speech/audio.flac + ``` + + You should see a response with the transcription result. + +* To run the `transcribe.py` sample: + + ```sh + $ python transcribe.py gs://python-docs-samples-tests/speech/audio.flac + ``` + + You should see a response with the transcription result. + +* Note that `gs://python-docs-samples-tests/speech/audio.flac` is the path to a + sample audio file, and you can transcribe your own audio files using this + method by uploading them to [Google Cloud Storage][gcs]. (The [gsutil][gsutil] + tool is often used for this purpose.) + +[gcs]: https://cloud.google.com/storage +[gsutil]: https://cloud.google.com/storage/docs/gsutil + +### Deactivate virtualenv + +When you're done running the sample, you can exit your virtualenv: + +``` +deactivate +``` diff --git a/speech/api/requirements-speech_grpc.txt b/speech/grpc/requirements.txt similarity index 100% rename from speech/api/requirements-speech_grpc.txt rename to speech/grpc/requirements.txt diff --git a/speech/api/resources/quit.raw b/speech/grpc/resources/quit.raw similarity index 100% rename from speech/api/resources/quit.raw rename to speech/grpc/resources/quit.raw diff --git a/speech/api/speech_grpc.py b/speech/grpc/transcribe.py similarity index 100% rename from speech/api/speech_grpc.py rename to speech/grpc/transcribe.py diff --git a/speech/api/speech_async_grpc.py b/speech/grpc/transcribe_async.py similarity index 100% rename from speech/api/speech_async_grpc.py rename to speech/grpc/transcribe_async.py diff --git a/speech/api/speech_streaming.py b/speech/grpc/transcribe_streaming.py similarity index 100% rename from speech/api/speech_streaming.py rename to speech/grpc/transcribe_streaming.py diff --git a/speech/api/speech_streaming_test.py b/speech/grpc/transcribe_streaming_test.py similarity index 85% rename from speech/api/speech_streaming_test.py rename to speech/grpc/transcribe_streaming_test.py index b81264fca8f..186c3888850 100644 --- a/speech/api/speech_streaming_test.py +++ b/speech/grpc/transcribe_streaming_test.py @@ -16,13 +16,13 @@ import re import time -import speech_streaming +import transcribe_streaming class MockAudioStream(object): def __init__(self, audio_filename, trailing_silence_secs=10): self.audio_filename = audio_filename - self.silence = io.BytesIO('\0\0' * speech_streaming.RATE * + self.silence = io.BytesIO('\0\0' * transcribe_streaming.RATE * trailing_silence_secs) def __enter__(self): @@ -38,7 +38,7 @@ def __call__(self, *args): def read(self, num_frames): # Approximate realtime by sleeping for the appropriate time for the # requested number of frames - time.sleep(num_frames / float(speech_streaming.RATE)) + time.sleep(num_frames / float(transcribe_streaming.RATE)) # audio is 16-bit samples, whereas python byte is 8-bit num_bytes = 2 * num_frames chunk = self.audio_file.read(num_bytes) or self.silence.read(num_bytes) @@ -56,11 +56,11 @@ def mock_audio_stream(channels, rate, chunk): def test_main(resource, monkeypatch, capsys): monkeypatch.setattr( - speech_streaming, 'record_audio', + transcribe_streaming, 'record_audio', mock_audio_stream(resource('quit.raw'))) - monkeypatch.setattr(speech_streaming, 'DEADLINE_SECS', 5) + monkeypatch.setattr(transcribe_streaming, 'DEADLINE_SECS', 30) - speech_streaming.main() + transcribe_streaming.main() out, err = capsys.readouterr() assert re.search(r'transcript.*"quit"', out, re.DOTALL | re.I)