From b5d4cebc62cbbcf1cac60007240f989003c07e9b Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Thu, 19 Jul 2018 15:09:06 -0700
Subject: [PATCH 01/12] Printing the last paragraph only.

---
 speech/cloud-client/README.rst            |  2 +-
 speech/cloud-client/beta_snippets.py      | 13 +++++--------
 speech/cloud-client/beta_snippets_test.py |  4 ++--
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst
index 394c97c81f2..fd5ce38bac6 100644
--- a/speech/cloud-client/README.rst
+++ b/speech/cloud-client/README.rst
@@ -221,7 +221,7 @@ To run this sample:
 
     $ python beta_snippets.py
 
-    usage: beta_snippets.py [-h] command path first second
+    usage: beta_snippets.py [-h] command path [first] [second]
 
     Google Cloud Speech API sample that demonstrates enhanced models
     and recognition metadata.
diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index 24e213be356..cd3579ee74b 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -156,7 +156,6 @@ def transcribe_file_with_diarization(speech_file):
 
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
-        sample_rate_hertz=16000,
         language_code='en-US',
         enable_speaker_diarization=True,
         diarization_speaker_count=2)
@@ -164,13 +163,11 @@ def transcribe_file_with_diarization(speech_file):
     print('Waiting for operation to complete...')
     response = client.recognize(config, audio)
 
-    for i, result in enumerate(response.results):
-        alternative = result.alternatives[0]
-        print('-' * 20)
-        print('First alternative of result {}: {}'
-              .format(i, alternative.transcript))
-        print('Speaker Tag for the first word: {}'
-              .format(alternative.words[0].speaker_tag))
+    result = response.results[-1]
+    words_info = result.alternatives[0].words
+    pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag)
+              for word_info in words_info]
+    print ' '.join(pieces)
     # [END speech_transcribe_diarization]
 
 
diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py
index bbb6c75f674..1a86a17b544 100644
--- a/speech/cloud-client/beta_snippets_test.py
+++ b/speech/cloud-client/beta_snippets_test.py
@@ -51,10 +51,10 @@ def test_transcribe_file_with_auto_punctuation(capsys):
 
 def test_transcribe_diarization(capsys):
     transcribe_file_with_diarization(
-        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+        os.path.join(RESOURCES, 'commercial_mono.wav'))
     out, err = capsys.readouterr()
 
-    assert 'OK Google stream stranger things from Netflix to my TV' in out
+    assert "I'm (1) here (1) hi (2)" in out
 
 
 def test_transcribe_multichannel_file(capsys):

From a4c2ca46a4057cd1760db9810072c7d536d2e074 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Thu, 19 Jul 2018 15:15:42 -0700
Subject: [PATCH 02/12] Python3 print

---
 speech/cloud-client/beta_snippets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index cd3579ee74b..41b83105f5f 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -167,7 +167,7 @@ def transcribe_file_with_diarization(speech_file):
     words_info = result.alternatives[0].words
     pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag)
               for word_info in words_info]
-    print ' '.join(pieces)
+    print(' '.join(pieces))
     # [END speech_transcribe_diarization]
 
 

From f7e413122b80143694e7f5dbf67f2e3656a8c499 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 08:57:36 -0700
Subject: [PATCH 03/12] Removing sample rate setting

---
 speech/cloud-client/beta_snippets.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index 41b83105f5f..b8990091623 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -46,7 +46,6 @@ def transcribe_file_with_enhanced_model(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
-        sample_rate_hertz=8000,
         language_code='en-US',
         # Enhanced models are only available to projects that
         # opt in for audio data collection.
@@ -95,7 +94,6 @@ def transcribe_file_with_metadata(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
-        sample_rate_hertz=8000,
         language_code='en-US',
         # Add this in the request to send metadata.
         metadata=metadata)
@@ -125,7 +123,6 @@ def transcribe_file_with_auto_punctuation(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
-        sample_rate_hertz=8000,
         language_code='en-US',
         # Enable automatic punctuation
         enable_automatic_punctuation=True)

From f1662fe32599c7e385ff2c74af39c16f5fed5185 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 10:36:47 -0700
Subject: [PATCH 04/12] Adding the missing output parameter in the example

---
 texttospeech/cloud-client/audio_profile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/texttospeech/cloud-client/audio_profile.py b/texttospeech/cloud-client/audio_profile.py
index c616f710026..21a6515b4c5 100644
--- a/texttospeech/cloud-client/audio_profile.py
+++ b/texttospeech/cloud-client/audio_profile.py
@@ -18,7 +18,7 @@
 
 Example usage:
     python audio_profile.py --text "hello" --effects_profile_id
-        "telephony-class-application"
+        "telephony-class-application" --output "output.mp3"
 """
 
 import argparse

From 4fbefa3d4492baadccf092b4ecc3fe8833c975e3 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 12:01:29 -0700
Subject: [PATCH 05/12] Changes based on the comments

---
 speech/cloud-client/beta_snippets.py      | 18 +++++++++++++++---
 speech/cloud-client/beta_snippets_test.py |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index b8990091623..e570cdf4417 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -160,11 +160,23 @@ def transcribe_file_with_diarization(speech_file):
     print('Waiting for operation to complete...')
     response = client.recognize(config, audio)
 
+    # response.results contains partial results with the last item
+    # containing the entire result:
     result = response.results[-1]
+
     words_info = result.alternatives[0].words
-    pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag)
-              for word_info in words_info]
-    print(' '.join(pieces))
+
+    # Separating the words by who said what:
+    speakers_words = []
+    for word_info in words_info:
+        if speakers_words and speakers_words[-1][0] == word_info.speaker_tag:
+            speakers_words[-1][1].append(word_info.word)
+        else:
+            speakers_words.append((word_info.speaker_tag, [word_info.word, ]))
+
+    # Printing the output based on who said what:
+    for speaker_tag, words in speakers_words:
+        print('Speaker #{}: {}'.format(speaker_tag, ' '.join(words)))
     # [END speech_transcribe_diarization]
 
 
diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py
index 1a86a17b544..b889fa5f092 100644
--- a/speech/cloud-client/beta_snippets_test.py
+++ b/speech/cloud-client/beta_snippets_test.py
@@ -54,7 +54,7 @@ def test_transcribe_diarization(capsys):
         os.path.join(RESOURCES, 'commercial_mono.wav'))
     out, err = capsys.readouterr()
 
-    assert "I'm (1) here (1) hi (2)" in out
+    assert "Speaker #1: I'm here" in out
 
 
 def test_transcribe_multichannel_file(capsys):

From b105e2ae79da25b5a6840d188dc95a74b0e6dbee Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 13:32:45 -0700
Subject: [PATCH 06/12] Removed filenames as input parameters

---
 speech/cloud-client/README.rst            | 14 ++---
 speech/cloud-client/beta_snippets.py      | 72 +++++++++++------------
 speech/cloud-client/beta_snippets_test.py | 21 +++----
 3 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst
index fd5ce38bac6..f9ecec0fa0f 100644
--- a/speech/cloud-client/README.rst
+++ b/speech/cloud-client/README.rst
@@ -227,13 +227,13 @@ To run this sample:
     and recognition metadata.
 
     Example usage:
-        python beta_snippets.py enhanced-model resources/commercial_mono.wav
-        python beta_snippets.py metadata resources/commercial_mono.wav
-        python beta_snippets.py punctuation resources/commercial_mono.wav
-        python beta_snippets.py diarization resources/commercial_mono.wav
-        python beta_snippets.py multi-channel resources/commercial_mono.wav
-        python beta_snippets.py multi-language resources/multi.wav en-US es
-        python beta_snippets.py word-level-conf resources/commercial_mono.wav
+        python beta_snippets.py enhanced-model
+        python beta_snippets.py metadata
+        python beta_snippets.py punctuation
+        python beta_snippets.py diarization
+        python beta_snippets.py multi-channel
+        python beta_snippets.py multi-language
+        python beta_snippets.py word-level-conf
 
     positional arguments:
       command
diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index e570cdf4417..56c7b2a6c91 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -18,27 +18,26 @@
 and recognition metadata.
 
 Example usage:
-    python beta_snippets.py enhanced-model resources/commercial_mono.wav
-    python beta_snippets.py metadata resources/commercial_mono.wav
-    python beta_snippets.py punctuation resources/commercial_mono.wav
-    python beta_snippets.py diarization resources/commercial_mono.wav
-    python beta_snippets.py multi-channel resources/commercial_mono.wav
-    python beta_snippets.py multi-language resources/multi.wav en-US es
-    python beta_snippets.py word-level-conf resources/commercial_mono.wav
+    python beta_snippets.py enhanced-model
+    python beta_snippets.py metadata
+    python beta_snippets.py punctuation
+    python beta_snippets.py diarization
+    python beta_snippets.py multi-channel
+    python beta_snippets.py multi-language
+    python beta_snippets.py word-level-conf
 """
 
 import argparse
 import io
 
 
-def transcribe_file_with_enhanced_model(speech_file):
+def transcribe_file_with_enhanced_model():
     """Transcribe the given audio file using an enhanced model."""
     # [START speech_transcribe_file_with_enhanced_model]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -46,6 +45,7 @@ def transcribe_file_with_enhanced_model(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         # Enhanced models are only available to projects that
         # opt in for audio data collection.
@@ -63,14 +63,13 @@ def transcribe_file_with_enhanced_model(speech_file):
     # [END speech_transcribe_file_with_enhanced_model]
 
 
-def transcribe_file_with_metadata(speech_file):
+def transcribe_file_with_metadata():
     """Send a request that includes recognition metadata."""
     # [START speech_transcribe_file_with_metadata]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -94,6 +93,7 @@ def transcribe_file_with_metadata(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         # Add this in the request to send metadata.
         metadata=metadata)
@@ -108,14 +108,13 @@ def transcribe_file_with_metadata(speech_file):
     # [END speech_transcribe_file_with_metadata]
 
 
-def transcribe_file_with_auto_punctuation(speech_file):
+def transcribe_file_with_auto_punctuation():
     """Transcribe the given audio file with auto punctuation enabled."""
     # [START speech_transcribe_file_with_auto_punctuation]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -123,6 +122,7 @@ def transcribe_file_with_auto_punctuation(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         # Enable automatic punctuation
         enable_automatic_punctuation=True)
@@ -137,14 +137,13 @@ def transcribe_file_with_auto_punctuation(speech_file):
     # [END speech_transcribe_file_with_auto_punctuation]
 
 
-def transcribe_file_with_diarization(speech_file):
+def transcribe_file_with_diarization():
     """Transcribe the given audio file synchronously with diarization."""
     # [START speech_transcribe_diarization]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -153,6 +152,7 @@ def transcribe_file_with_diarization(speech_file):
 
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         enable_speaker_diarization=True,
         diarization_speaker_count=2)
@@ -180,15 +180,14 @@ def transcribe_file_with_diarization(speech_file):
     # [END speech_transcribe_diarization]
 
 
-def transcribe_file_with_multichannel(speech_file):
+def transcribe_file_with_multichannel():
     """Transcribe the given audio file synchronously with
       multi channel."""
     # [START speech_transcribe_multichannel]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/Google_Gnome.wav'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -213,17 +212,16 @@ def transcribe_file_with_multichannel(speech_file):
     # [END speech_transcribe_multichannel]
 
 
-def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
+def transcribe_file_with_multilanguage():
     """Transcribe the given audio file synchronously with
       multi language."""
     # [START speech_transcribe_multilanguage]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
-    # first_lang = first language code, e,g, 'en-US'
-    # second_lang = first language code, e,g, 'es'
+    speech_file = 'resources/multi.wav'
+    first_lang = 'en-US'
+    second_lang = 'es'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -232,6 +230,7 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
 
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=44100,
         audio_channel_count=2,
         language_code=first_lang,
         alternative_language_codes=[second_lang])
@@ -247,15 +246,14 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
     # [END speech_transcribe_multilanguage]
 
 
-def transcribe_file_with_word_level_confidence(speech_file):
+def transcribe_file_with_word_level_confidence():
     """Transcribe the given audio file synchronously with
       word level confidence."""
     # [START speech_transcribe_word_level_confidence]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/Google_Gnome.wav'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -297,16 +295,16 @@ def transcribe_file_with_word_level_confidence(speech_file):
     args = parser.parse_args()
 
     if args.command == 'enhanced-model':
-        transcribe_file_with_enhanced_model(args.path)
+        transcribe_file_with_enhanced_model()
     elif args.command == 'metadata':
-        transcribe_file_with_metadata(args.path)
+        transcribe_file_with_metadata()
     elif args.command == 'punctuation':
-        transcribe_file_with_auto_punctuation(args.path)
+        transcribe_file_with_auto_punctuation()
     elif args.command == 'diarization':
-        transcribe_file_with_diarization(args.path)
+        transcribe_file_with_diarization()
     elif args.command == 'multi-channel':
-        transcribe_file_with_multichannel(args.path)
+        transcribe_file_with_multichannel()
     elif args.command == 'multi-language':
-        transcribe_file_with_multilanguage(args.path, args.first, args.second)
+        transcribe_file_with_multilanguage()
     elif args.command == 'word-level-conf':
-        transcribe_file_with_word_level_confidence(args.path)
+        transcribe_file_with_word_level_confidence()
diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py
index b889fa5f092..60c0c1caa93 100644
--- a/speech/cloud-client/beta_snippets_test.py
+++ b/speech/cloud-client/beta_snippets_test.py
@@ -26,56 +26,49 @@
 
 
 def test_transcribe_file_with_enhanced_model(capsys):
-    transcribe_file_with_enhanced_model(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_enhanced_model()
     out, _ = capsys.readouterr()
 
     assert 'Chrome' in out
 
 
 def test_transcribe_file_with_metadata(capsys):
-    transcribe_file_with_metadata(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_metadata()
     out, _ = capsys.readouterr()
 
     assert 'Chrome' in out
 
 
 def test_transcribe_file_with_auto_punctuation(capsys):
-    transcribe_file_with_auto_punctuation(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_auto_punctuation()
     out, _ = capsys.readouterr()
 
     assert 'Okay. Sure.' in out
 
 
 def test_transcribe_diarization(capsys):
-    transcribe_file_with_diarization(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_diarization()
     out, err = capsys.readouterr()
 
     assert "Speaker #1: I'm here" in out
 
 
 def test_transcribe_multichannel_file(capsys):
-    transcribe_file_with_multichannel(
-        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+    transcribe_file_with_multichannel()
     out, err = capsys.readouterr()
 
     assert 'OK Google stream stranger things from Netflix to my TV' in out
 
 
 def test_transcribe_multilanguage_file(capsys):
-    transcribe_file_with_multilanguage(
-        os.path.join(RESOURCES, 'multi.wav'), 'en-US', 'es')
+    transcribe_file_with_multilanguage()
     out, err = capsys.readouterr()
 
     assert 'how are you doing estoy bien e tu' in out
 
 
 def test_transcribe_word_level_confidence(capsys):
-    transcribe_file_with_word_level_confidence(
-        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+    transcribe_file_with_word_level_confidence()
     out, err = capsys.readouterr()
 
     assert 'OK Google stream stranger things from Netflix to my TV' in out

From b53296ae31f64d047ccbd3f028c26648013bb32b Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 13:35:08 -0700
Subject: [PATCH 07/12] Removed unused args

---
 speech/cloud-client/beta_snippets.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index 56c7b2a6c91..f6f1ff6dd1b 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -283,14 +283,6 @@ def transcribe_file_with_word_level_confidence():
         description=__doc__,
         formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument('command')
-    parser.add_argument(
-        'path', help='File for audio file to be recognized')
-    parser.add_argument(
-        'first', help='First language in audio file to be recognized',
-        nargs='?')
-    parser.add_argument(
-        'second', help='Second language in audio file to be recognized',
-        nargs='?')
 
     args = parser.parse_args()
 

From 46c1f43b2b8656bc95e6fe6065f905eefaa8b147 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 13:35:42 -0700
Subject: [PATCH 08/12] Updated README file

---
 speech/cloud-client/README.rst | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst
index f9ecec0fa0f..b6680c880f4 100644
--- a/speech/cloud-client/README.rst
+++ b/speech/cloud-client/README.rst
@@ -221,7 +221,7 @@ To run this sample:
 
     $ python beta_snippets.py
 
-    usage: beta_snippets.py [-h] command path [first] [second]
+    usage: beta_snippets.py [-h] command
 
     Google Cloud Speech API sample that demonstrates enhanced models
     and recognition metadata.
@@ -237,9 +237,6 @@ To run this sample:
 
     positional arguments:
       command
-      path        File for audio file to be recognized
-      first       First language in audio file to be recognized
-      second      Second language in audio file to be recognized
 
     optional arguments:
       -h, --help  show this help message and exit

From 99ed2898421531622e34da41a6c0c5ef28f7f6a0 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 14:33:28 -0700
Subject: [PATCH 09/12] Updated the inline comment

---
 speech/cloud-client/beta_snippets.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index f6f1ff6dd1b..033d656e967 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -160,8 +160,11 @@ def transcribe_file_with_diarization():
     print('Waiting for operation to complete...')
     response = client.recognize(config, audio)
 
-    # response.results contains partial results with the last item
-    # containing the entire result:
+    # The transcript within each result is separate and sequential per result.
+    # However, the words list within an alternative (for whatever reason)
+    # includes all the words from all the results thus far. Thus, to get all
+    # the words with speaker tags, you only have to take the words list from
+    # the last result:
     result = response.results[-1]
 
     words_info = result.alternatives[0].words

From 3ef4a0db9745653f774bf3193ddf38c5eca6658c Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 15:31:02 -0700
Subject: [PATCH 10/12] Modified code to make it more readable

---
 speech/cloud-client/beta_snippets.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index 033d656e967..1f8401e9695 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -170,16 +170,17 @@ def transcribe_file_with_diarization():
     words_info = result.alternatives[0].words
 
     # Separating the words by who said what:
-    speakers_words = []
+    speakers = []
+    words = []
     for word_info in words_info:
-        if speakers_words and speakers_words[-1][0] == word_info.speaker_tag:
-            speakers_words[-1][1].append(word_info.word)
-        else:
-            speakers_words.append((word_info.speaker_tag, [word_info.word, ]))
+        if (not speakers) or speakers[-1] != word_info.speaker_tag:
+            speakers.append(word_info.speaker_tag)
+            words.append([])
+        words[-1].append(word_info.word)
 
     # Printing the output based on who said what:
-    for speaker_tag, words in speakers_words:
-        print('Speaker #{}: {}'.format(speaker_tag, ' '.join(words)))
+    for speaker, words in zip(speakers, words):
+        print('Speaker #{}: {}'.format(speaker, ' '.join(words)))
     # [END speech_transcribe_diarization]
 
 

From 146a1808d4bffb410bd76c7053d8f79b6ea5d380 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 16:05:21 -0700
Subject: [PATCH 11/12] Simplified the response object processing.

---
 speech/cloud-client/beta_snippets.py      | 20 +++++---------------
 speech/cloud-client/beta_snippets_test.py |  2 +-
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index 1f8401e9695..58d4197a0be 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -161,26 +161,16 @@ def transcribe_file_with_diarization():
     response = client.recognize(config, audio)
 
     # The transcript within each result is separate and sequential per result.
-    # However, the words list within an alternative (for whatever reason)
-    # includes all the words from all the results thus far. Thus, to get all
-    # the words with speaker tags, you only have to take the words list from
-    # the last result:
+    # However, the words list within an alternative includes all the words
+    # from all the results thus far. Thus, to get all the words with speaker
+    # tags, you only have to take the words list from the last result:
     result = response.results[-1]
 
     words_info = result.alternatives[0].words
 
-    # Separating the words by who said what:
-    speakers = []
-    words = []
+    # Printing out the output:
     for word_info in words_info:
-        if (not speakers) or speakers[-1] != word_info.speaker_tag:
-            speakers.append(word_info.speaker_tag)
-            words.append([])
-        words[-1].append(word_info.word)
-
-    # Printing the output based on who said what:
-    for speaker, words in zip(speakers, words):
-        print('Speaker #{}: {}'.format(speaker, ' '.join(words)))
+        print("word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag))
     # [END speech_transcribe_diarization]
 
 
diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py
index 60c0c1caa93..5720da420d5 100644
--- a/speech/cloud-client/beta_snippets_test.py
+++ b/speech/cloud-client/beta_snippets_test.py
@@ -50,7 +50,7 @@ def test_transcribe_diarization(capsys):
     transcribe_file_with_diarization()
     out, err = capsys.readouterr()
 
-    assert "Speaker #1: I'm here" in out
+    assert "word: 'here', speaker_tag: 1" in out
 
 
 def test_transcribe_multichannel_file(capsys):

From 597dc0aaa70b2af8f1b8b6403d0cca7a28297f11 Mon Sep 17 00:00:00 2001
From: happyhuman <shahins@google.com>
Date: Fri, 20 Jul 2018 16:10:03 -0700
Subject: [PATCH 12/12] Fixing the long line issue.

---
 speech/cloud-client/beta_snippets.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
index 58d4197a0be..30ca9cde84b 100644
--- a/speech/cloud-client/beta_snippets.py
+++ b/speech/cloud-client/beta_snippets.py
@@ -170,7 +170,8 @@ def transcribe_file_with_diarization():
 
     # Printing out the output:
     for word_info in words_info:
-        print("word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag))
+        print("word: '{}', speaker_tag: {}".format(word_info.word,
+                                                   word_info.speaker_tag))
     # [END speech_transcribe_diarization]