In [13]:
def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        #encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        #sample_rate_hertz=44100,
        language_code='en-US')

    operation = client.long_running_recognize(config, audio)

    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)
    
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion
        print('Transcript: {}'.format(result.alternatives[0].transcript))
        print('Confidence: {}'.format(result.alternatives[0].confidence))

In [4]:
gcs_audio1 = "gs://sppech_to_text_bucket/audio1_mono.wav"
gcs_audio2 = "gs://sppech_to_text_bucket/audio2.wav"

In [14]:
transcribe_gcs(gcs_uri=gcs_audio1)


Waiting for operation to complete...
Transcript: thanks for calling AT&T mini with Erica today
Confidence: 0.809312105178833
Transcript:  6539 wigwam q02
Confidence: 0.7498751878738403
Transcript:  I got this letter saying I can get to Blackbird Court
Confidence: 0.8784077763557434
Transcript:  can a warrant be more than happy to see if you're in town
Confidence: 0.8001092076301575
Transcript:  the first and last name of the account holder
Confidence: 0.8356142044067383

In [15]:
transcribe_gcs(gcs_uri=gcs_audio2)


Waiting for operation to complete...
Transcript: well
Confidence: 0.7508707046508789
Transcript:  Hello. Hello.
Confidence: 0.7192739844322205
Transcript:  This is hi Mom, Craig.
Confidence: 0.6247369050979614
Transcript:  Plantation city, so we are you
Confidence: 0.6794960498809814

In [ ]:
import io
import os

# Imports the Google Cloud client library
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types

# Instantiates a client
client = speech.SpeechClient()

# The name of the audio file to transcribe
file_name = os.path.join(
    os.path.dirname(__file__),
    'resources',
    'audio.raw')

# Loads the audio into memory
with io.open(file_name, 'rb') as audio_file:
    content = audio_file.read()
    audio = types.RecognitionAudio(content=content)

config = types.RecognitionConfig(
    encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=16000,
    language_code='en-US')

# Detects speech in the audio file
response = client.recognize(config, audio)

for result in response.results:
    print('Transcript: {}'.format(result.alternatives[0].transcript))

In [4]:
! echo $GOOGLE_APPLICATION_CREDENTIALS


/home/surya/Desktop/LearnLive-5a01c60522cf.json

In [16]:
import textract

In [17]:
text = textract.process("/home/surya/Desktop/audio1.wav")

In [23]:
list(te)


Out[23]:
"b'\\n'"