Class 11: Natural Language Processing and Speech Recognition
DBPedia uses the data contained in WikiPedia in database form. The data in DBPedia can be queried in an SQL-like syntax named Protocol and RDF Query Language, or SPARQL.
For the text examples in this class we will use a sample of the DBPedia articles classified into 14 high level document classifications:
The data files can be found at this location.
TensorFlow makes available several operators designed for text classification.
In [7]:
# Classifying Text Documents
data = [
"This is a test",
"ABC",
"abc"
]
char_processor = skflow.preprocessing.ByteProcessor(5)
z = list(char_processor.fit_transform(data))
print(z)
In [4]:
import numpy as np
import os
from sklearn import metrics
import pandas
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from tensorflow.contrib import skflow
### Training data
# Download dbpedia_csv.tar.gz from
# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
# Unpack: tar -xvf dbpedia_csv.tar.gz
path = "./data/"
train = pandas.read_csv(os.path.join(path,"train.csv"), header=None)
X_train, y_train = train[2], train[0]
test = pandas.read_csv(os.path.join(path,"test.csv"), header=None)
X_test, y_test = test[2], test[0]
### Process vocabulary
MAX_DOCUMENT_LENGTH = 100
char_processor = skflow.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH)
X_train = np.array(list(char_processor.fit_transform(X_train)))
X_test = np.array(list(char_processor.transform(X_test)))
### Models
HIDDEN_SIZE = 20
def char_rnn_model(X, y):
byte_list = skflow.ops.one_hot_matrix(X, 256)
byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
cell = rnn_cell.GRUCell(HIDDEN_SIZE)
_, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32)
return skflow.models.logistic_regression(encoding, y)
classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15,
steps=1000, optimizer='Adam', learning_rate=0.01, continue_training=True)
In [5]:
# Continuesly train for 1000 steps & predict on test set.
for i in range(5):
classifier.fit(X_train, y_train)
score = metrics.accuracy_score(y_test, classifier.predict(X_test))
print("Accuracy: %f" % score)
In [10]:
print(type(X_train))
print(X_train.shape)
In [9]:
temp = skflow.ops.one_hot_matrix(X_train, 256)
print("1:{}".format(temp))
temp = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, temp)
print(len(temp))
print("2:{}".format(temp[0]))
The code below interfaces with your computer's microphone and speakers. It will not run in Data Scientist Workbench.
A very common use of LSTM and RNN's is speech recognition.
Google speech recognition makes use of LSTM and some other technologies.
See Google Speech Recognition in action.
In [11]:
# pip install SpeechRecognition
# see this for PyAudio
# pip install pyttsx
#!/usr/bin/env python3
# NOTE: this example requires PyAudio because it uses the Microphone class
import speech_recognition as sr
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source)
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
str = r.recognize_google(audio)
print("You said: {}".format(str))
os.system("say 'I believe you said: {}'".format(str))
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
In [20]:
# The following code works on a Mac
import os
def say(s):
s = s.replace("'","")
os.system("say '{}'".format(s))
say("Shall we play a game?")
In [28]:
# pip install SpeechRecognition
# see this for PyAudio
# pip install pyttsx
#!/usr/bin/env python3
# NOTE: this example requires PyAudio because it uses the Microphone class
import speech_recognition as sr
import os
def say(s):
s = s.replace("'","")
os.system("say '{}'".format(s))
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone() as source:
say("Hello there, please say something.")
audio = r.listen(source)
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
str = r.recognize_google(audio)
print("You said: {}".format(str))
say("I think you said {}".format(str))
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
ELIZA is an early natural language processing computer program created from 1964 to 1966 at the MIT Artificial Intelligence Laboratory by Joseph Weizenbaum. The following code is based in an Eliza Python Implementation by SureSmallThing.
In [17]:
import re
import random
import speech_recognition as sr
import os
reflections = {
"am": "are",
"was": "were",
"i": "you",
"i'd": "you would",
"i've": "you have",
"i'll": "you will",
"my": "your",
"are": "am",
"you've": "I have",
"you'll": "I will",
"your": "my",
"yours": "mine",
"you": "me",
"me": "you"
}
psychobabble = [
[r'i need (.*)',
["Why do you need {0}?",
"Would it really help you to get {0}?",
"Are you sure you need {0}?"]],
[r'why don\'?t you ([^\?]*)\??',
["Do you really think I don't {0}?",
"Perhaps eventually I will {0}.",
"Do you really want me to {0}?"]],
[r'why can\'?t I ([^\?]*)\??',
["Do you think you should be able to {0}?",
"If you could {0}, what would you do?",
"I don't know -- why can't you {0}?",
"Have you really tried?"]],
[r'i can\'?t (.*)',
["How do you know you can't {0}?",
"Perhaps you could {0} if you tried.",
"What would it take for you to {0}?"]],
[r'i am (.*)',
["Did you come to me because you are {0}?",
"How long have you been {0}?",
"How do you feel about being {0}?"]],
[r'i\'?m (.*)',
["How does being {0} make you feel?",
"Do you enjoy being {0}?",
"Why do you tell me you're {0}?",
"Why do you think you're {0}?"]],
[r'are you ([^\?]*)\??',
["Why does it matter whether I am {0}?",
"Would you prefer it if I were not {0}?",
"Perhaps you believe I am {0}.",
"I may be {0} -- what do you think?"]],
[r'what (.*)',
["Why do you ask?",
"How would an answer to that help you?",
"What do you think?"]],
[r'how (.*)',
["How do you suppose?",
"Perhaps you can answer your own question.",
"What is it you're really asking?"]],
[r'because (.*)',
["Is that the real reason?",
"What other reasons come to mind?",
"Does that reason apply to anything else?",
"If {0}, what else must be true?"]],
[r'(.*) sorry (.*)',
["There are many times when no apology is needed.",
"What feelings do you have when you apologize?"]],
[r'hello(.*)',
["Hello... I'm glad you could drop by today.",
"Hi there... how are you today?",
"Hello, how are you feeling today?"]],
[r'i think (.*)',
["Do you doubt {0}?",
"Do you really think so?",
"But you're not sure {0}?"]],
[r'(.*) friend (.*)',
["Tell me more about your friends.",
"When you think of a friend, what comes to mind?",
"Why don't you tell me about a childhood friend?"]],
[r'yes',
["You seem quite sure.",
"OK, but can you elaborate a bit?"]],
[r'(.*) computer(.*)',
["Are you really talking about me?",
"Does it seem strange to talk to a computer?",
"How do computers make you feel?",
"Do you feel threatened by computers?"]],
[r'is it (.*)',
["Do you think it is {0}?",
"Perhaps it's {0} -- what do you think?",
"If it were {0}, what would you do?",
"It could well be that {0}."]],
[r'it is (.*)',
["You seem very certain.",
"If I told you that it probably isn't {0}, what would you feel?"]],
[r'can you ([^\?]*)\??',
["What makes you think I can't {0}?",
"If I could {0}, then what?",
"Why do you ask if I can {0}?"]],
[r'can I ([^\?]*)\??',
["Perhaps you don't want to {0}.",
"Do you want to be able to {0}?",
"If you could {0}, would you?"]],
[r'you are (.*)',
["Why do you think I am {0}?",
"Does it please you to think that I'm {0}?",
"Perhaps you would like me to be {0}.",
"Perhaps you're really talking about yourself?"]],
[r'you\'?re (.*)',
["Why do you say I am {0}?",
"Why do you think I am {0}?",
"Are we talking about you, or me?"]],
[r'i don\'?t (.*)',
["Don't you really {0}?",
"Why don't you {0}?",
"Do you want to {0}?"]],
[r'i feel (.*)',
["Good, tell me more about these feelings.",
"Do you often feel {0}?",
"When do you usually feel {0}?",
"When you feel {0}, what do you do?"]],
[r'i have (.*)',
["Why do you tell me that you've {0}?",
"Have you really {0}?",
"Now that you have {0}, what will you do next?"]],
[r'i would (.*)',
["Could you explain why you would {0}?",
"Why would you {0}?",
"Who else knows that you would {0}?"]],
[r'is there (.*)',
["Do you think there is {0}?",
"It's likely that there is {0}.",
"Would you like there to be {0}?"]],
[r'my (.*)',
["I see, your {0}.",
"Why do you say that your {0}?",
"When your {0}, how do you feel?"]],
[r'you (.*)',
["We should be discussing you, not me.",
"Why do you say that about me?",
"Why do you care whether I {0}?"]],
[r'why (.*)',
["Why don't you tell me the reason why {0}?",
"Why do you think {0}?"]],
[r'i want (.*)',
["What would it mean to you if you got {0}?",
"Why do you want {0}?",
"What would you do if you got {0}?",
"If you got {0}, then what would you do?"]],
[r'(.*) mother(.*)',
["Tell me more about your mother.",
"What was your relationship with your mother like?",
"How do you feel about your mother?",
"How does this relate to your feelings today?",
"Good family relations are important."]],
[r'(.*) father(.*)',
["Tell me more about your father.",
"How did your father make you feel?",
"How do you feel about your father?",
"Does your relationship with your father relate to your feelings today?",
"Do you have trouble showing affection with your family?"]],
[r'(.*) child(.*)',
["Did you have close friends as a child?",
"What is your favorite childhood memory?",
"Do you remember any dreams or nightmares from childhood?",
"Did the other children sometimes tease you?",
"How do you think your childhood experiences relate to your feelings today?"]],
[r'(.*)\?',
["Why do you ask that?",
"Please consider whether you can answer your own question.",
"Perhaps the answer lies within yourself?",
"Why don't you tell me?"]],
[r'quit',
["Thank you for talking with me.",
"Good-bye.",
"Thank you, that will be $150. Have a good day!"]],
[r'(.*)',
["Please tell me more.",
"Let's change focus a bit... Tell me about your family.",
"Can you elaborate on that?",
"Why do you say that {0}?",
"I see.",
"Very interesting.",
"{0}.",
"I see. And what does that tell you?",
"How does that make you feel?",
"How do you feel when you say that?"]]
]
def reflect(fragment):
tokens = fragment.lower().split()
for i, token in enumerate(tokens):
if token in reflections:
tokens[i] = reflections[token]
return ' '.join(tokens)
def analyze(statement):
for pattern, responses in psychobabble:
match = re.match(pattern, statement.rstrip(".!"))
if match:
response = random.choice(responses)
return response.format(*[reflect(g) for g in match.groups()])
def say(s):
s = s.replace("'","")
os.system("say '{}'".format(s))
def main():
say("Hello. How are you feeling today?")
r = sr.Recognizer()
with sr.Microphone() as source:
done = False
while not done:
audio = r.listen(source)
# recognize speech using Google Speech Recognition
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)`
statement = r.recognize_google(audio)
print("Human: {}".format(statement))
response = analyze(statement)
if statement.lower() == 'quit':
done = True
print("Eliza (computer): {}".format(response))
say(response)
except sr.UnknownValueError:
print("No input, or could not understand audio.")
except sr.RequestError as e:
print("Error: Could not request results from Google Speech Recognition service; {0}".format(e))
if __name__ == "__main__":
main()
Using the above code you can create your own primitive chat bots. A some what famous video on Youtube from Cornell University shows what happens when two chat bots converse. Other interesting chat bot type technology:
In [ ]:
In [ ]: