In this notebook, we stack LSTM layers to classify IMDB movie reviews by their sentiment.
In [1]:
import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, SpatialDropout1D, LSTM
from keras.layers.wrappers import Bidirectional
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
# output directory name:
output_dir = 'model_output/stackedLSTM'
# training:
epochs = 4
batch_size = 128
# vector-space embedding:
n_dim = 64
n_unique_words = 10000
max_review_length = 200
pad_type = trunc_type = 'pre'
drop_embed = 0.2
# LSTM layer architecture:
n_lstm_1 = 64 # lower
n_lstm_2 = 64 # new!
drop_lstm = 0.2
In [3]:
(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words) # removed n_words_to_skip
In [4]:
x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
In [5]:
model = Sequential()
model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(Bidirectional(LSTM(n_lstm_1, dropout=drop_lstm, return_sequences=True))) # retain temporal dimension
model.add(Bidirectional(LSTM(n_lstm_2, dropout=drop_lstm)))
model.add(Dense(1, activation='sigmoid'))
In [6]:
# LSTM layer parameters double due to both reading directions
model.summary()
In [7]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
In [8]:
modelcheckpoint = ModelCheckpoint(filepath=output_dir+"/weights.{epoch:02d}.hdf5")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
In [9]:
# 87.6% validation accuracy in epoch 2
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])
Out[9]:
In [10]:
model.load_weights(output_dir+"/weights.01.hdf5") # zero-indexed
In [11]:
y_hat = model.predict_proba(x_valid)
In [12]:
plt.hist(y_hat)
_ = plt.axvline(x=0.5, color='orange')
In [13]:
"{:0.2f}".format(roc_auc_score(y_valid, y_hat)*100.0)
Out[13]:
In [ ]: