From b5d4cebc62cbbcf1cac60007240f989003c07e9b Mon Sep 17 00:00:00 2001 From: happyhuman Date: Thu, 19 Jul 2018 15:09:06 -0700 Subject: [PATCH 01/12] Printing the last paragraph only. --- speech/cloud-client/README.rst | 2 +- speech/cloud-client/beta_snippets.py | 13 +++++-------- speech/cloud-client/beta_snippets_test.py | 4 ++-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index 394c97c81f2..fd5ce38bac6 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -221,7 +221,7 @@ To run this sample: $ python beta_snippets.py - usage: beta_snippets.py [-h] command path first second + usage: beta_snippets.py [-h] command path [first] [second] Google Cloud Speech API sample that demonstrates enhanced models and recognition metadata. diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 24e213be356..cd3579ee74b 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -156,7 +156,6 @@ def transcribe_file_with_diarization(speech_file): config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=16000, language_code='en-US', enable_speaker_diarization=True, diarization_speaker_count=2) @@ -164,13 +163,11 @@ def transcribe_file_with_diarization(speech_file): print('Waiting for operation to complete...') response = client.recognize(config, audio) - for i, result in enumerate(response.results): - alternative = result.alternatives[0] - print('-' * 20) - print('First alternative of result {}: {}' - .format(i, alternative.transcript)) - print('Speaker Tag for the first word: {}' - .format(alternative.words[0].speaker_tag)) + result = response.results[-1] + words_info = result.alternatives[0].words + pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag) + for word_info in words_info] + print ' '.join(pieces) # [END speech_transcribe_diarization] diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index bbb6c75f674..1a86a17b544 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -51,10 +51,10 @@ def test_transcribe_file_with_auto_punctuation(capsys): def test_transcribe_diarization(capsys): transcribe_file_with_diarization( - os.path.join(RESOURCES, 'Google_Gnome.wav')) + os.path.join(RESOURCES, 'commercial_mono.wav')) out, err = capsys.readouterr() - assert 'OK Google stream stranger things from Netflix to my TV' in out + assert "I'm (1) here (1) hi (2)" in out def test_transcribe_multichannel_file(capsys): From a4c2ca46a4057cd1760db9810072c7d536d2e074 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Thu, 19 Jul 2018 15:15:42 -0700 Subject: [PATCH 02/12] Python3 print --- speech/cloud-client/beta_snippets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index cd3579ee74b..41b83105f5f 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -167,7 +167,7 @@ def transcribe_file_with_diarization(speech_file): words_info = result.alternatives[0].words pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag) for word_info in words_info] - print ' '.join(pieces) + print(' '.join(pieces)) # [END speech_transcribe_diarization] From f7e413122b80143694e7f5dbf67f2e3656a8c499 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 08:57:36 -0700 Subject: [PATCH 03/12] Removing sample rate setting --- speech/cloud-client/beta_snippets.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 41b83105f5f..b8990091623 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -46,7 +46,6 @@ def transcribe_file_with_enhanced_model(speech_file): audio = speech.types.RecognitionAudio(content=content) config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, language_code='en-US', # Enhanced models are only available to projects that # opt in for audio data collection. @@ -95,7 +94,6 @@ def transcribe_file_with_metadata(speech_file): audio = speech.types.RecognitionAudio(content=content) config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, language_code='en-US', # Add this in the request to send metadata. metadata=metadata) @@ -125,7 +123,6 @@ def transcribe_file_with_auto_punctuation(speech_file): audio = speech.types.RecognitionAudio(content=content) config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=8000, language_code='en-US', # Enable automatic punctuation enable_automatic_punctuation=True) From f1662fe32599c7e385ff2c74af39c16f5fed5185 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 10:36:47 -0700 Subject: [PATCH 04/12] Adding the missing output parameter in the example --- texttospeech/cloud-client/audio_profile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/texttospeech/cloud-client/audio_profile.py b/texttospeech/cloud-client/audio_profile.py index c616f710026..21a6515b4c5 100644 --- a/texttospeech/cloud-client/audio_profile.py +++ b/texttospeech/cloud-client/audio_profile.py @@ -18,7 +18,7 @@ Example usage: python audio_profile.py --text "hello" --effects_profile_id - "telephony-class-application" + "telephony-class-application" --output "output.mp3" """ import argparse From 4fbefa3d4492baadccf092b4ecc3fe8833c975e3 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 12:01:29 -0700 Subject: [PATCH 05/12] Changes based on the comments --- speech/cloud-client/beta_snippets.py | 18 +++++++++++++++--- speech/cloud-client/beta_snippets_test.py | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index b8990091623..e570cdf4417 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -160,11 +160,23 @@ def transcribe_file_with_diarization(speech_file): print('Waiting for operation to complete...') response = client.recognize(config, audio) + # response.results contains partial results with the last item + # containing the entire result: result = response.results[-1] + words_info = result.alternatives[0].words - pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag) - for word_info in words_info] - print(' '.join(pieces)) + + # Separating the words by who said what: + speakers_words = [] + for word_info in words_info: + if speakers_words and speakers_words[-1][0] == word_info.speaker_tag: + speakers_words[-1][1].append(word_info.word) + else: + speakers_words.append((word_info.speaker_tag, [word_info.word, ])) + + # Printing the output based on who said what: + for speaker_tag, words in speakers_words: + print('Speaker #{}: {}'.format(speaker_tag, ' '.join(words))) # [END speech_transcribe_diarization] diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index 1a86a17b544..b889fa5f092 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -54,7 +54,7 @@ def test_transcribe_diarization(capsys): os.path.join(RESOURCES, 'commercial_mono.wav')) out, err = capsys.readouterr() - assert "I'm (1) here (1) hi (2)" in out + assert "Speaker #1: I'm here" in out def test_transcribe_multichannel_file(capsys): From b105e2ae79da25b5a6840d188dc95a74b0e6dbee Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 13:32:45 -0700 Subject: [PATCH 06/12] Removed filenames as input parameters --- speech/cloud-client/README.rst | 14 ++--- speech/cloud-client/beta_snippets.py | 72 +++++++++++------------ speech/cloud-client/beta_snippets_test.py | 21 +++---- 3 files changed, 49 insertions(+), 58 deletions(-) diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index fd5ce38bac6..f9ecec0fa0f 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -227,13 +227,13 @@ To run this sample: and recognition metadata. Example usage: - python beta_snippets.py enhanced-model resources/commercial_mono.wav - python beta_snippets.py metadata resources/commercial_mono.wav - python beta_snippets.py punctuation resources/commercial_mono.wav - python beta_snippets.py diarization resources/commercial_mono.wav - python beta_snippets.py multi-channel resources/commercial_mono.wav - python beta_snippets.py multi-language resources/multi.wav en-US es - python beta_snippets.py word-level-conf resources/commercial_mono.wav + python beta_snippets.py enhanced-model + python beta_snippets.py metadata + python beta_snippets.py punctuation + python beta_snippets.py diarization + python beta_snippets.py multi-channel + python beta_snippets.py multi-language + python beta_snippets.py word-level-conf positional arguments: command diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index e570cdf4417..56c7b2a6c91 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -18,27 +18,26 @@ and recognition metadata. Example usage: - python beta_snippets.py enhanced-model resources/commercial_mono.wav - python beta_snippets.py metadata resources/commercial_mono.wav - python beta_snippets.py punctuation resources/commercial_mono.wav - python beta_snippets.py diarization resources/commercial_mono.wav - python beta_snippets.py multi-channel resources/commercial_mono.wav - python beta_snippets.py multi-language resources/multi.wav en-US es - python beta_snippets.py word-level-conf resources/commercial_mono.wav + python beta_snippets.py enhanced-model + python beta_snippets.py metadata + python beta_snippets.py punctuation + python beta_snippets.py diarization + python beta_snippets.py multi-channel + python beta_snippets.py multi-language + python beta_snippets.py word-level-conf """ import argparse import io -def transcribe_file_with_enhanced_model(speech_file): +def transcribe_file_with_enhanced_model(): """Transcribe the given audio file using an enhanced model.""" # [START speech_transcribe_file_with_enhanced_model] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' + speech_file = 'resources/commercial_mono.wav' with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -46,6 +45,7 @@ def transcribe_file_with_enhanced_model(speech_file): audio = speech.types.RecognitionAudio(content=content) config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, language_code='en-US', # Enhanced models are only available to projects that # opt in for audio data collection. @@ -63,14 +63,13 @@ def transcribe_file_with_enhanced_model(speech_file): # [END speech_transcribe_file_with_enhanced_model] -def transcribe_file_with_metadata(speech_file): +def transcribe_file_with_metadata(): """Send a request that includes recognition metadata.""" # [START speech_transcribe_file_with_metadata] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' + speech_file = 'resources/commercial_mono.wav' with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -94,6 +93,7 @@ def transcribe_file_with_metadata(speech_file): audio = speech.types.RecognitionAudio(content=content) config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, language_code='en-US', # Add this in the request to send metadata. metadata=metadata) @@ -108,14 +108,13 @@ def transcribe_file_with_metadata(speech_file): # [END speech_transcribe_file_with_metadata] -def transcribe_file_with_auto_punctuation(speech_file): +def transcribe_file_with_auto_punctuation(): """Transcribe the given audio file with auto punctuation enabled.""" # [START speech_transcribe_file_with_auto_punctuation] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' + speech_file = 'resources/commercial_mono.wav' with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -123,6 +122,7 @@ def transcribe_file_with_auto_punctuation(speech_file): audio = speech.types.RecognitionAudio(content=content) config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, language_code='en-US', # Enable automatic punctuation enable_automatic_punctuation=True) @@ -137,14 +137,13 @@ def transcribe_file_with_auto_punctuation(speech_file): # [END speech_transcribe_file_with_auto_punctuation] -def transcribe_file_with_diarization(speech_file): +def transcribe_file_with_diarization(): """Transcribe the given audio file synchronously with diarization.""" # [START speech_transcribe_diarization] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' + speech_file = 'resources/commercial_mono.wav' with open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -153,6 +152,7 @@ def transcribe_file_with_diarization(speech_file): config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, language_code='en-US', enable_speaker_diarization=True, diarization_speaker_count=2) @@ -180,15 +180,14 @@ def transcribe_file_with_diarization(speech_file): # [END speech_transcribe_diarization] -def transcribe_file_with_multichannel(speech_file): +def transcribe_file_with_multichannel(): """Transcribe the given audio file synchronously with multi channel.""" # [START speech_transcribe_multichannel] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' + speech_file = 'resources/Google_Gnome.wav' with open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -213,17 +212,16 @@ def transcribe_file_with_multichannel(speech_file): # [END speech_transcribe_multichannel] -def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): +def transcribe_file_with_multilanguage(): """Transcribe the given audio file synchronously with multi language.""" # [START speech_transcribe_multilanguage] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' - # first_lang = first language code, e,g, 'en-US' - # second_lang = first language code, e,g, 'es' + speech_file = 'resources/multi.wav' + first_lang = 'en-US' + second_lang = 'es' with open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -232,6 +230,7 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): config = speech.types.RecognitionConfig( encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=44100, audio_channel_count=2, language_code=first_lang, alternative_language_codes=[second_lang]) @@ -247,15 +246,14 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang): # [END speech_transcribe_multilanguage] -def transcribe_file_with_word_level_confidence(speech_file): +def transcribe_file_with_word_level_confidence(): """Transcribe the given audio file synchronously with word level confidence.""" # [START speech_transcribe_word_level_confidence] from google.cloud import speech_v1p1beta1 as speech client = speech.SpeechClient() - # TODO(developer): Uncomment and set to a path to your audio file. - # speech_file = 'path/to/file.wav' + speech_file = 'resources/Google_Gnome.wav' with open(speech_file, 'rb') as audio_file: content = audio_file.read() @@ -297,16 +295,16 @@ def transcribe_file_with_word_level_confidence(speech_file): args = parser.parse_args() if args.command == 'enhanced-model': - transcribe_file_with_enhanced_model(args.path) + transcribe_file_with_enhanced_model() elif args.command == 'metadata': - transcribe_file_with_metadata(args.path) + transcribe_file_with_metadata() elif args.command == 'punctuation': - transcribe_file_with_auto_punctuation(args.path) + transcribe_file_with_auto_punctuation() elif args.command == 'diarization': - transcribe_file_with_diarization(args.path) + transcribe_file_with_diarization() elif args.command == 'multi-channel': - transcribe_file_with_multichannel(args.path) + transcribe_file_with_multichannel() elif args.command == 'multi-language': - transcribe_file_with_multilanguage(args.path, args.first, args.second) + transcribe_file_with_multilanguage() elif args.command == 'word-level-conf': - transcribe_file_with_word_level_confidence(args.path) + transcribe_file_with_word_level_confidence() diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index b889fa5f092..60c0c1caa93 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -26,56 +26,49 @@ def test_transcribe_file_with_enhanced_model(capsys): - transcribe_file_with_enhanced_model( - os.path.join(RESOURCES, 'commercial_mono.wav')) + transcribe_file_with_enhanced_model() out, _ = capsys.readouterr() assert 'Chrome' in out def test_transcribe_file_with_metadata(capsys): - transcribe_file_with_metadata( - os.path.join(RESOURCES, 'commercial_mono.wav')) + transcribe_file_with_metadata() out, _ = capsys.readouterr() assert 'Chrome' in out def test_transcribe_file_with_auto_punctuation(capsys): - transcribe_file_with_auto_punctuation( - os.path.join(RESOURCES, 'commercial_mono.wav')) + transcribe_file_with_auto_punctuation() out, _ = capsys.readouterr() assert 'Okay. Sure.' in out def test_transcribe_diarization(capsys): - transcribe_file_with_diarization( - os.path.join(RESOURCES, 'commercial_mono.wav')) + transcribe_file_with_diarization() out, err = capsys.readouterr() assert "Speaker #1: I'm here" in out def test_transcribe_multichannel_file(capsys): - transcribe_file_with_multichannel( - os.path.join(RESOURCES, 'Google_Gnome.wav')) + transcribe_file_with_multichannel() out, err = capsys.readouterr() assert 'OK Google stream stranger things from Netflix to my TV' in out def test_transcribe_multilanguage_file(capsys): - transcribe_file_with_multilanguage( - os.path.join(RESOURCES, 'multi.wav'), 'en-US', 'es') + transcribe_file_with_multilanguage() out, err = capsys.readouterr() assert 'how are you doing estoy bien e tu' in out def test_transcribe_word_level_confidence(capsys): - transcribe_file_with_word_level_confidence( - os.path.join(RESOURCES, 'Google_Gnome.wav')) + transcribe_file_with_word_level_confidence() out, err = capsys.readouterr() assert 'OK Google stream stranger things from Netflix to my TV' in out From b53296ae31f64d047ccbd3f028c26648013bb32b Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 13:35:08 -0700 Subject: [PATCH 07/12] Removed unused args --- speech/cloud-client/beta_snippets.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 56c7b2a6c91..f6f1ff6dd1b 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -283,14 +283,6 @@ def transcribe_file_with_word_level_confidence(): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('command') - parser.add_argument( - 'path', help='File for audio file to be recognized') - parser.add_argument( - 'first', help='First language in audio file to be recognized', - nargs='?') - parser.add_argument( - 'second', help='Second language in audio file to be recognized', - nargs='?') args = parser.parse_args() From 46c1f43b2b8656bc95e6fe6065f905eefaa8b147 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 13:35:42 -0700 Subject: [PATCH 08/12] Updated README file --- speech/cloud-client/README.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst index f9ecec0fa0f..b6680c880f4 100644 --- a/speech/cloud-client/README.rst +++ b/speech/cloud-client/README.rst @@ -221,7 +221,7 @@ To run this sample: $ python beta_snippets.py - usage: beta_snippets.py [-h] command path [first] [second] + usage: beta_snippets.py [-h] command Google Cloud Speech API sample that demonstrates enhanced models and recognition metadata. @@ -237,9 +237,6 @@ To run this sample: positional arguments: command - path File for audio file to be recognized - first First language in audio file to be recognized - second Second language in audio file to be recognized optional arguments: -h, --help show this help message and exit From 99ed2898421531622e34da41a6c0c5ef28f7f6a0 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 14:33:28 -0700 Subject: [PATCH 09/12] Updated the inline comment --- speech/cloud-client/beta_snippets.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index f6f1ff6dd1b..033d656e967 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -160,8 +160,11 @@ def transcribe_file_with_diarization(): print('Waiting for operation to complete...') response = client.recognize(config, audio) - # response.results contains partial results with the last item - # containing the entire result: + # The transcript within each result is separate and sequential per result. + # However, the words list within an alternative (for whatever reason) + # includes all the words from all the results thus far. Thus, to get all + # the words with speaker tags, you only have to take the words list from + # the last result: result = response.results[-1] words_info = result.alternatives[0].words From 3ef4a0db9745653f774bf3193ddf38c5eca6658c Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 15:31:02 -0700 Subject: [PATCH 10/12] Modified code to make it more readable --- speech/cloud-client/beta_snippets.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 033d656e967..1f8401e9695 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -170,16 +170,17 @@ def transcribe_file_with_diarization(): words_info = result.alternatives[0].words # Separating the words by who said what: - speakers_words = [] + speakers = [] + words = [] for word_info in words_info: - if speakers_words and speakers_words[-1][0] == word_info.speaker_tag: - speakers_words[-1][1].append(word_info.word) - else: - speakers_words.append((word_info.speaker_tag, [word_info.word, ])) + if (not speakers) or speakers[-1] != word_info.speaker_tag: + speakers.append(word_info.speaker_tag) + words.append([]) + words[-1].append(word_info.word) # Printing the output based on who said what: - for speaker_tag, words in speakers_words: - print('Speaker #{}: {}'.format(speaker_tag, ' '.join(words))) + for speaker, words in zip(speakers, words): + print('Speaker #{}: {}'.format(speaker, ' '.join(words))) # [END speech_transcribe_diarization] From 146a1808d4bffb410bd76c7053d8f79b6ea5d380 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 16:05:21 -0700 Subject: [PATCH 11/12] Simplified the response object processing. --- speech/cloud-client/beta_snippets.py | 20 +++++--------------- speech/cloud-client/beta_snippets_test.py | 2 +- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 1f8401e9695..58d4197a0be 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -161,26 +161,16 @@ def transcribe_file_with_diarization(): response = client.recognize(config, audio) # The transcript within each result is separate and sequential per result. - # However, the words list within an alternative (for whatever reason) - # includes all the words from all the results thus far. Thus, to get all - # the words with speaker tags, you only have to take the words list from - # the last result: + # However, the words list within an alternative includes all the words + # from all the results thus far. Thus, to get all the words with speaker + # tags, you only have to take the words list from the last result: result = response.results[-1] words_info = result.alternatives[0].words - # Separating the words by who said what: - speakers = [] - words = [] + # Printing out the output: for word_info in words_info: - if (not speakers) or speakers[-1] != word_info.speaker_tag: - speakers.append(word_info.speaker_tag) - words.append([]) - words[-1].append(word_info.word) - - # Printing the output based on who said what: - for speaker, words in zip(speakers, words): - print('Speaker #{}: {}'.format(speaker, ' '.join(words))) + print("word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag)) # [END speech_transcribe_diarization] diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py index 60c0c1caa93..5720da420d5 100644 --- a/speech/cloud-client/beta_snippets_test.py +++ b/speech/cloud-client/beta_snippets_test.py @@ -50,7 +50,7 @@ def test_transcribe_diarization(capsys): transcribe_file_with_diarization() out, err = capsys.readouterr() - assert "Speaker #1: I'm here" in out + assert "word: 'here', speaker_tag: 1" in out def test_transcribe_multichannel_file(capsys): From 597dc0aaa70b2af8f1b8b6403d0cca7a28297f11 Mon Sep 17 00:00:00 2001 From: happyhuman Date: Fri, 20 Jul 2018 16:10:03 -0700 Subject: [PATCH 12/12] Fixing the long line issue. --- speech/cloud-client/beta_snippets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py index 58d4197a0be..30ca9cde84b 100644 --- a/speech/cloud-client/beta_snippets.py +++ b/speech/cloud-client/beta_snippets.py @@ -170,7 +170,8 @@ def transcribe_file_with_diarization(): # Printing out the output: for word_info in words_info: - print("word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag)) + print("word: '{}', speaker_tag: {}".format(word_info.word, + word_info.speaker_tag)) # [END speech_transcribe_diarization]