In [14]:
import json
from os.path import join, dirname
from watson_developer_cloud import SpeechToTextV1

In [27]:
## INITIALIZE MODEL
speech_to_text = SpeechToTextV1(
    url='https://stream.watsonplatform.net/speech-to-text/api',
    username='b04e29d6-7e27-4e8a-abf4-d1e21a8ba9a4',
    password='OZbGBVkfVCN3'
)

keyWords=['BRICK', 'CLOCK', 'PANTS', 'GLASS', 'JUICE']

# usa_speech_to_text = speech_to_text.models
print(json.dumps(speech_to_text.models(), indent=2))


{
  "models": [
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/fr-FR_BroadbandModel", 
      "rate": 16000, 
      "name": "fr-FR_BroadbandModel", 
      "language": "fr-FR", 
      "description": "French broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/en-US_NarrowbandModel", 
      "rate": 8000, 
      "name": "en-US_NarrowbandModel", 
      "language": "en-US", 
      "description": "US English narrowband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/pt-BR_BroadbandModel", 
      "rate": 16000, 
      "name": "pt-BR_BroadbandModel", 
      "language": "pt-BR", 
      "description": "Brazilian Portuguese broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/en-UK_BroadbandModel", 
      "rate": 16000, 
      "name": "en-UK_BroadbandModel", 
      "language": "en-UK", 
      "description": "UK English broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/zh-CN_BroadbandModel", 
      "rate": 16000, 
      "name": "zh-CN_BroadbandModel", 
      "language": "zh-CN", 
      "description": "Mandarin broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/ja-JP_BroadbandModel", 
      "rate": 16000, 
      "name": "ja-JP_BroadbandModel", 
      "language": "ja-JP", 
      "description": "Japanese broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/pt-BR_NarrowbandModel", 
      "rate": 8000, 
      "name": "pt-BR_NarrowbandModel", 
      "language": "pt-BR", 
      "description": "Brazilian Portuguese narrowband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/es-ES_BroadbandModel", 
      "rate": 16000, 
      "name": "es-ES_BroadbandModel", 
      "language": "es-ES", 
      "description": "Spanish broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/ar-AR_BroadbandModel", 
      "rate": 16000, 
      "name": "ar-AR_BroadbandModel", 
      "language": "ar-AR", 
      "description": "Modern Standard Arabic broadband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/zh-CN_NarrowbandModel", 
      "rate": 8000, 
      "name": "zh-CN_NarrowbandModel", 
      "language": "zh-CN", 
      "description": "Mandarin narrowband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/ja-JP_NarrowbandModel", 
      "rate": 8000, 
      "name": "ja-JP_NarrowbandModel", 
      "language": "ja-JP", 
      "description": "Japanese narrowband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/es-ES_NarrowbandModel", 
      "rate": 8000, 
      "name": "es-ES_NarrowbandModel", 
      "language": "es-ES", 
      "description": "Spanish narrowband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/en-UK_NarrowbandModel", 
      "rate": 8000, 
      "name": "en-UK_NarrowbandModel", 
      "language": "en-UK", 
      "description": "UK English narrowband model."
    }, 
    {
      "url": "https://stream.watsonplatform.net/speech-to-text/api/v1/models/en-US_BroadbandModel", 
      "rate": 16000, 
      "name": "en-US_BroadbandModel", 
      "language": "en-US", 
      "description": "US English broadband model."
    }
  ]
}

In [34]:
## OUTPUT FILE
AUDIOFILE = '/Volumes/NIL_PASS/NIH034/behavioral/paRemap/session_1/blk0(1)_144329462.wav'
with open(AUDIOFILE, 'rb') as audio_file:
    test = speech_to_text.recognize(
        audio=audio_file,
        content_type='audio/wav',
        model='en-US_NarrowbandModel',
        keywords=['BRICK', 'CLOCK', 'PANTS', 'GLASS', 'JUICE'],
        keywords_threshold=0,
        timestamps=True
    )
    
    print json.dumps(test, indent=2)


{
  "results": [
    {
      "keywords_result": {}, 
      "alternatives": [
        {
          "timestamps": [
            [
              "clocks", 
              3.43, 
              4.18
            ]
          ], 
          "confidence": 0.681, 
          "transcript": "clocks "
        }
      ], 
      "final": true
    }
  ], 
  "result_index": 0
}

In [ ]: