Simple example using LSTM recurrent neural network to classify emotion dataset.
References:
- Long Short Term Memory, Sepp Hochreiter & Jurgen Schmidhuber, Neural
Computation 9(8): 1735-1780, 1997.
- Andrew L. Maas, Raymond E. Daly, Peter T. Pham, Dan Huang, Andrew Y. Ng,
and Christopher Potts. (2011). Learning Word Vectors for Sentiment
Analysis. The 49th Annual Meeting of the Association for Computational
Linguistics (ACL 2011).
Links:
- http://colah.github.io/posts/2015-08-Understanding-LSTMs/
- http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
- http://machinelearningmastery.com/sequence-classification-lstm-recurrent-neural-networks-python-keras/
- https://github.com/tflearn/tflearn/blob/master/examples/nlp/lstm.py
- http://tflearn.org/
In [1]:
from __future__ import division, print_function
import pickle
import time
import tflearn
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
In [2]:
# Data set loading
params_file = 'data_params.pkl'
with open(params_file, 'rb') as f:
params = pickle.load(f)
max_sequence_length = params['max_sequence_length']
labels = params['labels']
vocab = params['vocab']
num_classes = len(labels)
num_words = len(vocab)
data_file = 'processed_emotions.pkl'
data = pd.read_pickle(data_file)
dataY = data['label'].values
dataX = data['sequence'].values
In [3]:
# Split data set
kfolds = 1
test_ratio = 0.2
sss = StratifiedShuffleSplit(n_splits=kfolds, test_size=test_ratio, random_state=0)
for train_index, test_index in sss.split(dataX, dataY):
trainX, testX = dataX[train_index], dataX[test_index]
trainY, testY = dataY[train_index], dataY[test_index]
In [ ]:
# TODO: use kfold cross validation for hyper-parameter tuning on training set
In [4]:
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=max_sequence_length, value=0.)
testX = pad_sequences(testX, maxlen=max_sequence_length, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=num_classes)
testY = to_categorical(testY, nb_classes=num_classes)
In [5]:
# TODO: try adding a 1D Convolution and Max Pooling Layer prior to LSTM
# Network building
net = tflearn.input_data([None, max_sequence_length])
net = tflearn.embedding(net, input_dim=num_words, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.5)
net = tflearn.fully_connected(net, num_classes, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy')
In [6]:
# Training
start_time = time.time()
model = tflearn.DNN(net, tensorboard_verbose=3)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=32)
print('Training duration (s): {}'.format(time.time() - start_time))
In [10]:
from emojibot.utils.text_utils import clean_sentence, encode_sentence
test_sentence = "i love puppies"
test_sentence = clean_sentence(sentence=test_sentence)
test_sentence = encode_sentence(sentence=test_sentence, vocab=vocab)
test_sequence = pad_sequences([test_sentence], maxlen=max_sequence_length, value=0.)
probs = model.predict(test_sequence)[0]
pred_class = probs.index(max(probs))
labels[pred_class]
Out[10]:
In [11]:
model.save('emoji.model')
In [9]:
# TODO: improve use of tensorboard - https://github.com/tflearn/tflearn/blob/master/examples/extending_tensorflow/summaries.py