In [1]:
from IPython.display import YouTubeVideo
YouTubeVideo('4m44aPkLY2k')
Out[1]:
by: Sam.Gu@KudosData.com
May 2017 ========== Scan the QR code to become trainer's friend in WeChat ========>>
In [1]:
# parm_runtime_env_GCP = True
parm_runtime_env_GCP = False
From the same API console, choose "Dashboard" on the left-hand menu and "Enable API".
Enable the following APIs for your project (search for them) if they are not already enabled:
Finally, because we are calling the APIs from Python (clients in many other languages are available), let's install the Python package (it's not installed by default on Datalab)
In [2]:
# Copyright 2016 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# import subprocess
# retcode = subprocess.call(['pip', 'install', '-U', 'google-api-python-client'])
# retcode = subprocess.call(['pip', 'install', '-U', 'gTTS'])
# Below is for GCP only: install audio conversion tool
# retcode = subprocess.call(['apt-get', 'update', '-y'])
# retcode = subprocess.call(['apt-get', 'install', 'libav-tools', '-y'])
In [3]:
import io, os, subprocess, sys, re, codecs, time, datetime, requests, itchat
from itchat.content import *
from googleapiclient.discovery import build
First, visit API console, choose "Credentials" on the left-hand menu. Choose "Create Credentials" and generate an API key for your application. You should probably restrict it by IP address to prevent abuse, but for now, just leave that field blank and delete the API key after trying out this demo.
Copy-paste your API Key here:
In [4]:
# Here I read in my own API_KEY from a file, which is not shared in Github repository:
with io.open('../../API_KEY.txt') as fp:
for line in fp: APIKEY = line
# You need to un-comment below line and replace 'APIKEY' variable with your own GCP API key:
# APIKEY='AIzaSyCvxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
In [5]:
# Below is for Google Speech synthesis: text to voice API
# from gtts import gTTS
# Below is for Google Speech recognition: voice to text API
# speech_service = build('speech', 'v1', developerKey=APIKEY)
# Below is for Google Language Tranlation API
# service = build('translate', 'v2', developerKey=APIKEY)
# Below is for Google Natual Language Processing API
# nlp_service = build('language', 'v1', developerKey=APIKEY)
nlp_service = build('language', 'v1beta2', developerKey=APIKEY)
In [6]:
# Import the base64 encoding library.
import base64
# Pass the image data to an encoding function.
def encode_image(image_file):
with io.open(image_file, "rb") as image_file:
image_content = image_file.read()
# Python 2
if sys.version_info[0] < 3:
return base64.b64encode(image_content)
# Python 3
else:
return base64.b64encode(image_content).decode('utf-8')
# Pass the audio data to an encoding function.
def encode_audio(audio_file):
with io.open(audio_file, 'rb') as audio_file:
audio_content = audio_file.read()
# Python 2
if sys.version_info[0] < 3:
return base64.b64encode(audio_content)
# Python 3
else:
return base64.b64encode(audio_content).decode('utf-8')
In [7]:
# API control parameter for Image API:
parm_image_maxResults = 10 # max objects or faces to be extracted from image analysis
# API control parameter for Language Translation API:
parm_translation_origin_language = 'zh' # original language in text: to be overwriten by TEXT_DETECTION
parm_translation_target_language = 'zh' # target language for translation: Chinese
# API control parameter for 消息文字转成语音 (Speech synthesis: text to voice)
parm_speech_synthesis_language = 'zh' # speech synthesis API 'text to voice' language
# parm_speech_synthesis_language = 'zh-tw' # speech synthesis API 'text to voice' language
# parm_speech_synthesis_language = 'zh-yue' # speech synthesis API 'text to voice' language
# API control parameter for 语音转换成消息文字 (Speech recognition: voice to text)
# parm_speech_recognition_language = 'en' # speech API 'voice to text' language
parm_speech_recognition_language = 'cmn-Hans-CN' # speech API 'voice to text' language
# API control parameter for 自然语言处理:语义和情感分析
parm_nlp_extractDocumentSentiment = True # 情感分析 (Sentiment analysis)
parm_nlp_extractEntities = True # 消息文字中名称实体的识别 (Name-Entity detection)
parm_nlp_extractEntitySentiment = False # Only available in v1beta2. But Chinese language zh is not supported yet.
parm_nlp_extractSyntax = True # 语句的语法分析 (Syntax / Grammar analysis)
In [8]:
# Running Speech API
def KudosData_nlp(text, extractDocumentSentiment, extractEntities, extractEntitySentiment, extractSyntax):
# Python 2
# if sys.version_info[0] < 3:
# tts = gTTS(text=text2voice.encode('utf-8'), lang=parm_speech_synthesis_language, slow=False)
# Python 3
# else:
# tts = gTTS(text=text2voice, lang=parm_speech_synthesis_language, slow=False)
request = nlp_service.documents().annotateText(body={
"document":{
"type": "PLAIN_TEXT",
"content": text
},
"features": {
"extractDocumentSentiment": extractDocumentSentiment,
"extractEntities": extractEntities,
"extractEntitySentiment": extractEntitySentiment, # only available in v1beta2
"extractSyntax": extractSyntax,
},
"encodingType":"UTF8"
})
responses = request.execute(num_retries=3)
print('\nCompeleted: NLP analysis API')
return responses
< Start of interactive demo >
In [9]:
text4nlp = 'As a data science consultant and trainer with Kudos Data, Zhan GU (Sam) engages communities and schools ' \
'to help organizations making sense of their data using advanced data science , machine learning and ' \
'cloud computing technologies. Inspire next generation of artificial intelligence lovers and leaders.'
In [10]:
text4nlp = '作为酷豆数据科学的顾问和培训师,Sam Gu (白黑) 善长联络社群和教育资源。' \
'促进各大公司组织使用先进的数据科学、机器学习和云计算技术来获取数据洞见。激励下一代人工智能爱好者和领导者。'
In [11]:
responses = KudosData_nlp(text4nlp
, parm_nlp_extractDocumentSentiment
, parm_nlp_extractEntities
, parm_nlp_extractEntitySentiment
, parm_nlp_extractSyntax)
In [12]:
# print(responses)
In [13]:
# print(responses['entities'])
In [14]:
for i in range(len(responses['entities'])):
# print(responses['entities'][i])
print('')
print(u'[ 实体 {} : {} ]\n 实体类别 : {}\n 重要程度 : {}'.format(
i+1
, responses['entities'][i]['name']
, responses['entities'][i]['type']
, responses['entities'][i]['salience']
))
# print(responses['entities'][i]['name'])
# print(responses['entities'][i]['type'])
# print(responses['entities'][i]['salience'])
if 'sentiment' in responses['entities'][i]:
print(u' 褒贬程度 : {}\n 语彩累积 : {}'.format(
responses['entities'][i]['sentiment']['score']
, responses['entities'][i]['sentiment']['magnitude']
))
# print(responses['entities'][i]['sentiment'])
if responses['entities'][i]['metadata'] != {}:
if 'wikipedia_url' in responses['entities'][i]['metadata']:
print(' ' + responses['entities'][i]['metadata']['wikipedia_url'])
In [15]:
# print(responses['sentences'])
In [16]:
for i in range(len(responses['sentences'])):
print('')
print(u'[ 语句 {} : {} ]\n( 褒贬程度 : {} | 语彩累积 : {} )'.format(
i+1
, responses['sentences'][i]['text']['content']
, responses['sentences'][i]['sentiment']['score']
, responses['sentences'][i]['sentiment']['magnitude']
))
https://cloud.google.com/natural-language/docs/basics
Sentiment | Sample Values |
---|---|
明显褒义 Clearly Positive | "score 褒贬程度": 0.8, "magnitude 语彩累积": 3.0 |
明显贬义 Clearly Negative | "score 褒贬程度": -0.6, "magnitude 语彩累积": 4.0 |
中性 Neutral | "score 褒贬程度": 0.1, "magnitude 语彩累积": 0.0 |
混合 Mixed | "score 褒贬程度": 0.0, "magnitude 语彩累积": 4.0 |
In [17]:
# print(responses['documentSentiment'])
In [18]:
print(u'[ 整篇消息 语种 : {} ]\n( 褒贬程度 : {} | 语彩累积 : {} )'.format(
responses['language']
, responses['documentSentiment']['score']
, responses['documentSentiment']['magnitude']
))
In [19]:
for i in range(len(responses['tokens'])):
print('')
print(responses['tokens'][i]['text']['content'])
print(responses['tokens'][i]['partOfSpeech'])
print(responses['tokens'][i]['dependencyEdge'])
# print(responses['tokens'][i]['text'])
# print(responses['tokens'][i]['lemma'])
< End of interactive demo >
In [20]:
def KudosData_nlp_generate_reply(responses):
nlp_reply = u'[ NLP 自然语言处理结果 ]'
# 1. 整篇消息文字的情感分析 (Sentiment analysis, Document level)
nlp_reply += '\n'
nlp_reply += '\n' + u'[ 整篇消息 语种 : {} ]\n( 褒贬程度 : {} | 语彩累积 : {} )'.format(
responses['language']
, responses['documentSentiment']['score']
, responses['documentSentiment']['magnitude']
)
# 2. 消息文字中语句的情感分析 (Sentiment analysis, Sentence level)
nlp_reply += '\n'
for i in range(len(responses['sentences'])):
nlp_reply += '\n' + u'[ 语句 {} : {} ]\n( 褒贬程度 : {} | 语彩累积 : {} )'.format(
i+1
, responses['sentences'][i]['text']['content']
, responses['sentences'][i]['sentiment']['score']
, responses['sentences'][i]['sentiment']['magnitude']
)
# 3. 消息文字中名称实体的识别 (Name-Entity detection)
nlp_reply += '\n'
for i in range(len(responses['entities'])):
nlp_reply += '\n' + u'[ 实体 {} : {} ]\n 实体类别 : {}\n 重要程度 : {}'.format(
i+1
, responses['entities'][i]['name']
, responses['entities'][i]['type']
, responses['entities'][i]['salience']
)
if 'sentiment' in responses['entities'][i]:
nlp_reply += '\n' + u' 褒贬程度 : {}\n 语彩累积 : {}'.format(
responses['entities'][i]['sentiment']['score']
, responses['entities'][i]['sentiment']['magnitude']
)
if responses['entities'][i]['metadata'] != {}:
if 'wikipedia_url' in responses['entities'][i]['metadata']:
nlp_reply += '\n ' + responses['entities'][i]['metadata']['wikipedia_url']
# 4. 语句的语法分析 (Syntax / Grammar analysis)
# nlp_reply += '\n'
# for i in range(len(responses['tokens'])):
# nlp_reply += '\n' + str(responses['tokens'][i])
return nlp_reply
In [21]:
print(KudosData_nlp_generate_reply(responses))
In [ ]:
itchat.auto_login(hotReload=True) # hotReload=True: 退出程序后暂存登陆状态。即使程序关闭,一定时间内重新开启也可以不用重新扫码。
In [ ]:
# Obtain my own Nick Name
MySelf = itchat.search_friends()
NickName4RegEx = '@' + MySelf['NickName'] + '\s*'
In [ ]:
# 单聊模式,自动进行自然语言分析,以文本形式返回处理结果:
@itchat.msg_register([TEXT, MAP, CARD, NOTE, SHARING])
def text_reply(msg):
text4nlp = msg['Content']
# call NLP API:
nlp_responses = KudosData_nlp(text4nlp
, parm_nlp_extractDocumentSentiment
, parm_nlp_extractEntities
, parm_nlp_extractEntitySentiment
, parm_nlp_extractSyntax)
# Format NLP results:
nlp_reply = KudosData_nlp_generate_reply(nlp_responses)
print(nlp_reply)
return nlp_reply
In [ ]:
# 群聊模式,如果收到 @ 自己的文字信息,会自动进行自然语言分析,以文本形式返回处理结果:
@itchat.msg_register(TEXT, isGroupChat=True)
def text_reply(msg):
if msg['isAt']:
text4nlp = re.sub(NickName4RegEx, '', msg['Content'])
# call NLP API:
nlp_responses = KudosData_nlp(text4nlp
, parm_nlp_extractDocumentSentiment
, parm_nlp_extractEntities
, parm_nlp_extractEntitySentiment
, parm_nlp_extractSyntax)
# Format NLP results:
nlp_reply = KudosData_nlp_generate_reply(nlp_responses)
print(nlp_reply)
return nlp_reply
In [ ]:
itchat.run()
In [ ]:
# interupt kernel, then logout
itchat.logout() # 安全退出