In [1]:
from IPython.display import YouTubeVideo
YouTubeVideo('yZV-XCSmWQY')
Out[1]:
by: Sam.Gu@KudosData.com
May 2017 ========== Scan the QR code to become trainer's friend in WeChat ========>>
In [1]:
parm_runtime_env_GCP = True
# parm_runtime_env_GCP = False
From the same API console, choose "Dashboard" on the left-hand menu and "Enable API".
Enable the following APIs for your project (search for them) if they are not already enabled:
Finally, because we are calling the APIs from Python (clients in many other languages are available), let's install the Python package (it's not installed by default on Datalab)
In [2]:
# Copyright 2016 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# import subprocess
# retcode = subprocess.call(['pip', 'install', '-U', 'google-api-python-client'])
# retcode = subprocess.call(['pip', 'install', '-U', 'gTTS'])
# Below is for GCP only: install audio conversion tool
# retcode = subprocess.call(['apt-get', 'update', '-y'])
# retcode = subprocess.call(['apt-get', 'install', 'libav-tools', '-y'])
In [3]:
import io, os, subprocess, sys, re, codecs, time, datetime, requests, itchat
from itchat.content import *
from googleapiclient.discovery import build
First, visit API console, choose "Credentials" on the left-hand menu. Choose "Create Credentials" and generate an API key for your application. You should probably restrict it by IP address to prevent abuse, but for now, just leave that field blank and delete the API key after trying out this demo.
Copy-paste your API Key here:
In [4]:
# Here I read in my own API_KEY from a file, which is not shared in Github repository:
# with io.open('../../API_KEY.txt') as fp:
# for line in fp: APIKEY = line
# You need to un-comment below line and replace 'APIKEY' variable with your own GCP API key:
APIKEY='AIzaSyCvxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
In [5]:
# Below is for Google Speech synthesis: text to voice API
from gtts import gTTS
# Below is for Google Speech recognition: voice to text API
speech_service = build('speech', 'v1', developerKey=APIKEY)
# Below is for Google Language Tranlation API
service = build('translate', 'v2', developerKey=APIKEY)
In [6]:
# Import the base64 encoding library.
import base64
# Pass the image data to an encoding function.
def encode_image(image_file):
with io.open(image_file, "rb") as image_file:
image_content = image_file.read()
# Python 2
if sys.version_info[0] < 3:
return base64.b64encode(image_content)
# Python 3
else:
return base64.b64encode(image_content).decode('utf-8')
# Pass the audio data to an encoding function.
def encode_audio(audio_file):
with io.open(audio_file, 'rb') as audio_file:
audio_content = audio_file.read()
# Python 2
if sys.version_info[0] < 3:
return base64.b64encode(audio_content)
# Python 3
else:
return base64.b64encode(audio_content).decode('utf-8')
In [7]:
# API control parameter for Image API:
parm_image_maxResults = 10 # max objects or faces to be extracted from image analysis
# API control parameter for Language Translation API:
parm_translation_origin_language = 'zh' # original language in text: to be overwriten by TEXT_DETECTION
parm_translation_target_language = 'zh' # target language for translation: Chinese
# API control parameter for 消息文字转成语音 (Speech synthesis: text to voice)
parm_speech_synthesis_language = 'zh' # speech synthesis API 'text to voice' language
# parm_speech_synthesis_language = 'zh-tw' # speech synthesis API 'text to voice' language
# parm_speech_synthesis_language = 'zh-yue' # speech synthesis API 'text to voice' language
# API control parameter for 语音转换成消息文字 (Speech recognition: voice to text)
# parm_speech_recognition_language = 'en' # speech API 'voice to text' language
parm_speech_recognition_language = 'cmn-Hans-CN' # speech API 'voice to text' language
https://pypi.python.org/pypi/gTTS
'af' : 'Afrikaans' 'sq' : 'Albanian' 'ar' : 'Arabic' 'hy' : 'Armenian' 'bn' : 'Bengali' 'ca' : 'Catalan' 'zh' : 'Chinese' 'zh-cn' : 'Chinese (Mandarin/China)' 'zh-tw' : 'Chinese (Mandarin/Taiwan)' 'zh-yue' : 'Chinese (Cantonese)' 'hr' : 'Croatian' 'cs' : 'Czech' 'da' : 'Danish' 'nl' : 'Dutch' 'en' : 'English' 'en-au' : 'English (Australia)' 'en-uk' : 'English (United Kingdom)' 'en-us' : 'English (United States)' 'eo' : 'Esperanto' 'fi' : 'Finnish' 'fr' : 'French' 'de' : 'German' 'el' : 'Greek' 'hi' : 'Hindi' 'hu' : 'Hungarian' 'is' : 'Icelandic' 'id' : 'Indonesian' 'it' : 'Italian' 'ja' : 'Japanese' 'km' : 'Khmer (Cambodian)' 'ko' : 'Korean' 'la' : 'Latin' 'lv' : 'Latvian' 'mk' : 'Macedonian' 'no' : 'Norwegian' 'pl' : 'Polish' 'pt' : 'Portuguese' 'ro' : 'Romanian' 'ru' : 'Russian' 'sr' : 'Serbian' 'si' : 'Sinhala' 'sk' : 'Slovak' 'es' : 'Spanish' 'es-es' : 'Spanish (Spain)' 'es-us' : 'Spanish (United States)' 'sw' : 'Swahili' 'sv' : 'Swedish' 'ta' : 'Tamil' 'th' : 'Thai' 'tr' : 'Turkish' 'uk' : 'Ukrainian' 'vi' : 'Vietnamese' 'cy' : 'Welsh'
In [8]:
# Running Speech API
def KudosData_text_to_voice(text2voice):
# Python 2
if sys.version_info[0] < 3:
tts = gTTS(text=text2voice.encode('utf-8'), lang=parm_speech_synthesis_language, slow=False)
# Python 3
else:
tts = gTTS(text=text2voice, lang=parm_speech_synthesis_language, slow=False)
text2voiceMP3name = 'Voice_For_You.mp3'
tts.save(text2voiceMP3name)
print('\nCompeleted: Speech synthesis API ( Text -> Voice)')
print(text2voice)
return text2voiceMP3name
https://cloud.google.com/speech/docs/languages
'af-ZA' 'Afrikaans (South Africa)' 'id-ID' 'Indonesian (Indonesia)' 'ms-MY' 'Malay (Malaysia)' 'ca-ES' 'Catalan (Spain)' 'cs-CZ' 'Czech (Czech Republic)' 'da-DK' 'Danish (Denmark)' 'de-DE' 'German (Germany)' 'en-AU' 'English (Australia)' 'en-CA' 'English (Canada)' 'en-GB' 'English (United Kingdom)' 'en-IN' 'English (India)' 'en-IE' 'English (Ireland)' 'en-NZ' 'English (New Zealand)' 'en-PH' 'English (Philippines)' 'en-ZA' 'English (South Africa)' 'en-US' 'English (United States)' 'es-AR' 'Spanish (Argentina)' 'es-BO' 'Spanish (Bolivia)' 'es-CL' 'Spanish (Chile)' 'es-CO' 'Spanish (Colombia)' 'es-CR' 'Spanish (Costa Rica)' 'es-EC' 'Spanish (Ecuador)' 'es-SV' 'Spanish (El Salvador)' 'es-ES' 'Spanish (Spain)' 'es-US' 'Spanish (United States)' 'es-GT' 'Spanish (Guatemala)' 'es-HN' 'Spanish (Honduras)' 'es-MX' 'Spanish (Mexico)' 'es-NI' 'Spanish (Nicaragua)' 'es-PA' 'Spanish (Panama)' 'es-PY' 'Spanish (Paraguay)' 'es-PE' 'Spanish (Peru)' 'es-PR' 'Spanish (Puerto Rico)' 'es-DO' 'Spanish (Dominican Republic)' 'es-UY' 'Spanish (Uruguay)' 'es-VE' 'Spanish (Venezuela)' 'eu-ES' 'Basque (Spain)' 'fil-PH' 'Filipino (Philippines)' 'fr-CA' 'French (Canada)' 'fr-FR' 'French (France)' 'gl-ES' 'Galician (Spain)' 'hr-HR' 'Croatian (Croatia)' 'zu-ZA' 'Zulu (South Africa)' 'is-IS' 'Icelandic (Iceland)' 'it-IT' 'Italian (Italy)' 'lt-LT' 'Lithuanian (Lithuania)' 'hu-HU' 'Hungarian (Hungary)' 'nl-NL' 'Dutch (Netherlands)' 'nb-NO' 'Norwegian Bokmål (Norway)' 'pl-PL' 'Polish (Poland)' 'pt-BR' 'Portuguese (Brazil)' 'pt-PT' 'Portuguese (Portugal)' 'ro-RO' 'Romanian (Romania)' 'sk-SK' 'Slovak (Slovakia)' 'sl-SI' 'Slovenian (Slovenia)' 'fi-FI' 'Finnish (Finland)' 'sv-SE' 'Swedish (Sweden)' 'vi-VN' 'Vietnamese (Vietnam)' 'tr-TR' 'Turkish (Turkey)' 'el-GR' 'Greek (Greece)' 'bg-BG' 'Bulgarian (Bulgaria)' 'ru-RU' 'Russian (Russia)' 'sr-RS' 'Serbian (Serbia)' 'uk-UA' 'Ukrainian (Ukraine)' 'he-IL' 'Hebrew (Israel)' 'ar-IL' 'Arabic (Israel)' 'ar-JO' 'Arabic (Jordan)' 'ar-AE' 'Arabic (United Arab Emirates)' 'ar-BH' 'Arabic (Bahrain)' 'ar-DZ' 'Arabic (Algeria)' 'ar-SA' 'Arabic (Saudi Arabia)' 'ar-IQ' 'Arabic (Iraq)' 'ar-KW' 'Arabic (Kuwait)' 'ar-MA' 'Arabic (Morocco)' 'ar-TN' 'Arabic (Tunisia)' 'ar-OM' 'Arabic (Oman)' 'ar-PS' 'Arabic (State of Palestine)' 'ar-QA' 'Arabic (Qatar)' 'ar-LB' 'Arabic (Lebanon)' 'ar-EG' 'Arabic (Egypt)' 'fa-IR' 'Persian (Iran)' 'hi-IN' 'Hindi (India)' 'th-TH' 'Thai (Thailand)' 'ko-KR' 'Korean (South Korea)' 'cmn-Hant-TW' 'Chinese, Mandarin (Traditional, Taiwan)' 'yue-Hant-HK' 'Chinese, Cantonese (Traditional, Hong Kong)' 'ja-JP' 'Japanese (Japan)' 'cmn-Hans-HK' 'Chinese, Mandarin (Simplified, Hong Kong)' 'cmn-Hans-CN' 'Chinese, Mandarin (Simplified, China)'
In [9]:
# msg.download(msg.fileName)
# print('\nDownloaded image file name is: %s' % msg['FileName'])
# audio_file_input = msg['FileName']
# audio_type = ['flac', 'wav']
# Running Speech API
def KudosData_voice_to_text(audio_file_input, audio_type):
audio_file_output = str(audio_file_input) + '.' + str(audio_type)
# print('audio_file_input : %s' % audio_file_input)
print('Converted audio file for API: %s' % audio_file_output)
# convert mp3 file to target GCP audio file:
# remove audio_file_output, is exist
retcode = subprocess.call(['rm', audio_file_output])
# print(retcode)
if parm_runtime_env_GCP: # using Datalab in Google Cloud Platform
# GCP: use avconv to convert audio
retcode = subprocess.call(['avconv', '-i', audio_file_input, '-ac', '1', audio_file_output])
else: # using a Kudos Data Virtual Machine, or local machine
# VM : use ffmpeg to convert audio
retcode = subprocess.call(['ffmpeg', '-i', audio_file_input, '-ac', '1', audio_file_output])
# print(retcode)
# Call GCP Speech API:
# response = speech_service.speech().syncrecognize(
response = speech_service.speech().recognize(
body={
'config': {
# 'encoding': 'LINEAR16',
# 'sampleRateHertz': 16000,
'languageCode': parm_speech_recognition_language
},
'audio': {
'content': encode_audio(audio_file_output) # base64 of converted audio file, for speech recognition
}
}).execute()
print('Compeleted: Speech recognition API ( Voice -> Text )')
return response
https://cloud.google.com/translate/docs/languages
'af' 'Afrikaans' 'sq' 'Albanian' 'am' 'Amharic' 'ar' 'Arabic' 'hy' 'Armenian' 'az' 'Azeerbaijani' 'eu' 'Basque' 'be' 'Belarusian' 'bn' 'Bengali' 'bs' 'Bosnian' 'bg' 'Bulgarian' 'ca' 'Catalan' 'ceb (ISO-639-2)' 'Cebuano' 'ny' 'Chichewa' 'zh-CN (BCP-47)' 'Chinese (Simplified)' 'zh-TW (BCP-47)' 'Chinese (Traditional)' 'co' 'Corsican' 'hr' 'Croatian' 'cs' 'Czech' 'da' 'Danish' 'nl' 'Dutch' 'en' 'English' 'eo' 'Esperanto' 'et' 'Estonian' 'tl' 'Filipino' 'fi' 'Finnish' 'fr' 'French' 'fy' 'Frisian' 'gl' 'Galician' 'ka' 'Georgian' 'de' 'German' 'el' 'Greek' 'gu' 'Gujarati' 'ht' 'Haitian Creole' 'ha' 'Hausa' 'haw (ISO-639-2)' 'Hawaiian' 'iw' 'Hebrew' 'hi' 'Hindi' 'hmn (ISO-639-2)' 'Hmong' 'hu' 'Hungarian' 'is' 'Icelandic' 'ig' 'Igbo' 'id' 'Indonesian' 'ga' 'Irish' 'it' 'Italian' 'ja' 'Japanese' 'jw' 'Javanese' 'kn' 'Kannada' 'kk' 'Kazakh' 'km' 'Khmer' 'ko' 'Korean' 'ku' 'Kurdish' 'ky' 'Kyrgyz' 'lo' 'Lao' 'la' 'Latin' 'lv' 'Latvian' 'lt' 'Lithuanian' 'lb' 'Luxembourgish' 'mk' 'Macedonian' 'mg' 'Malagasy' 'ms' 'Malay' 'ml' 'Malayalam' 'mt' 'Maltese' 'mi' 'Maori' 'mr' 'Marathi' 'mn' 'Mongolian' 'my' 'Burmese' 'ne' 'Nepali' 'no' 'Norwegian' 'ps' 'Pashto' 'fa' 'Persian' 'pl' 'Polish' 'pt' 'Portuguese' 'ma' 'Punjabi' 'ro' 'Romanian' 'ru' 'Russian' 'sm' 'Samoan' 'gd' 'Scots Gaelic' 'sr' 'Serbian' 'st' 'Sesotho' 'sn' 'Shona' 'sd' 'Sindhi' 'si' 'Sinhala' 'sk' 'Slovak' 'sl' 'Slovenian' 'so' 'Somali' 'es' 'Spanish' 'su' 'Sundanese' 'sw' 'Swahili' 'sv' 'Swedish' 'tg' 'Tajik' 'ta' 'Tamil' 'te' 'Telugu' 'th' 'Thai' 'tr' 'Turkish' 'uk' 'Ukrainian' 'ur' 'Urdu' 'uz' 'Uzbek' 'vi' 'Vietnamese' 'cy' 'Welsh' 'xh' 'Xhosa' 'yi' 'Yiddish' 'yo' 'Yoruba' 'zu' 'Zulu'
In [10]:
def KudosData_TEXT_TRANSLATION(text, origin_language_code, target_language_code):
# Call translation if parm_translation_origin_language is not parm_translation_target_language
if origin_language_code != target_language_code:
outputs = service.translations().list(source=origin_language_code,
target=target_language_code, q=text).execute()
translated_text = ''
translated_text += u'---- Start Translation ----\n'
translated_text += u'( Origin Lang 原文: ' + origin_language_code + ' )\n'
translated_text += u'( Target Lang 译文: ' + target_language_code + ' )\n'
translated_text += outputs['translations'][0]['translatedText'] + '\n' + '----- End Translation -----\n'
print('Compeleted: Translation API : From Language \'%s\' to \'%s\''
% (origin_language_code, target_language_code))
else:
translated_text = text
return translated_text
In [11]:
itchat.auto_login(hotReload=True) # hotReload=True: 退出程序后暂存登陆状态。即使程序关闭,一定时间内重新开启也可以不用重新扫码。
In [12]:
# Obtain my own Nick Name
MySelf = itchat.search_friends()
NickName4RegEx = '@' + MySelf['NickName'] + '\s*'
In [ ]:
# 1. 消息文字转成语音 (Speech synthesis: text to voice)
# 在群里,如果收到 @ 自己的文字信息,会自动将文字转换成语音,再以 mp3 文件方式发送回复:
@itchat.msg_register(TEXT, isGroupChat=True)
def text_to_voice_reply(msg):
if msg['isAt']:
# Remove my own Nick Name from message:
text2voice = re.sub(NickName4RegEx, '', msg['Content'])
text2voiceMP3name = KudosData_text_to_voice(text2voice)
itchat.send('@%s@%s' % ('fil', text2voiceMP3name), msg['FromUserName'])
In [13]:
# 2. 语音转换成消息文字 (Speech recognition: voice to text)
@itchat.msg_register([RECORDING], isGroupChat=True)
@itchat.msg_register([RECORDING])
def download_files(msg):
parm_translation_origin_language = 'zh' # will be overwriten by TEXT_DETECTION
msg.download(msg.fileName)
print('\nDownloaded audio file name is: %s' % msg['FileName'])
##############################################################################################################
# call audio analysis APIs #
##############################################################################################################
audio_analysis_reply = u'[ Audio Analysis 音频处理结果 ]\n'
# Voice to Text:
audio_analysis_reply += u'\n[ Voice -> Text 语音识别 ]\n'
response = KudosData_voice_to_text(msg['FileName'], 'flac')
# response = KudosData_voice_to_text(msg['FileName'], 'wav')
if response != {}:
print (response['results'][0]['alternatives'][0]['transcript'])
print ('( confidence %f )' % response['results'][0]['alternatives'][0]['confidence'])
audio_analysis_reply += response['results'][0]['alternatives'][0]['transcript'] + '\n'
audio_analysis_reply += '( confidence ' + str(response['results'][0]['alternatives'][0]['confidence']) + ' )\n'
# Translate recognised text to another language:
parm_translation_origin_language = 'zh'
parm_translation_target_language = 'en'
translated_text_reply = KudosData_TEXT_TRANSLATION(response['results'][0]['alternatives'][0]['transcript'],
parm_translation_origin_language, parm_translation_target_language)
print(translated_text_reply)
audio_analysis_reply += translated_text_reply
return audio_analysis_reply
In [ ]:
# 3. 消息文字的多语言互译 (Text based language translation)
# 在群里,如果收到 @ 自己的文字信息,会自动进行文字翻译,再发送回复:
@itchat.msg_register(TEXT, isGroupChat=True)
def text_to_translation_reply(msg):
if msg['isAt']:
text4translation = re.sub(NickName4RegEx, '', msg['Content'])
parm_translation_origin_language = 'zh'
parm_translation_target_language = 'en'
translated_text_reply = KudosData_TEXT_TRANSLATION(text4translation,
parm_translation_origin_language, parm_translation_target_language)
print(translated_text_reply)
return translated_text_reply
Combined:
In [14]:
@itchat.msg_register(TEXT, isGroupChat=True)
def text_reply(msg):
if msg['isAt']:
# 1. 消息文字转成语音 (Speech synthesis: text to voice)
text2voice = re.sub(NickName4RegEx, '', msg['Content']) # Remove my own Nick Name from message
text2voiceMP3name = KudosData_text_to_voice(text2voice)
itchat.send('@%s@%s' % ('fil', text2voiceMP3name), msg['FromUserName'])
# 3. 消息文字的多语言互译 (Text based language translation)
text4translation = re.sub(NickName4RegEx, '', msg['Content'])
parm_translation_origin_language = 'zh'
parm_translation_target_language = 'en'
translated_text_reply = KudosData_TEXT_TRANSLATION(text4translation,
parm_translation_origin_language, parm_translation_target_language)
print(translated_text_reply)
return translated_text_reply
In [15]:
itchat.run()
In [42]:
# interupt kernel, then logout
itchat.logout() # 安全退出
Out[42]: