In this notebook, we stack an LSTM on top of a convolutional layer to classify IMDB movie reviews by their sentiment.
In [1]:
import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, SpatialDropout1D, LSTM
from keras.layers.wrappers import Bidirectional
from keras.layers import Conv1D, MaxPooling1D
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
# output directory name:
output_dir = 'model_output/cnnLSTM'
# training:
epochs = 4
batch_size = 128
# vector-space embedding:
n_dim = 64
n_unique_words = 10000
max_review_length = 200
pad_type = trunc_type = 'pre'
drop_embed = 0.2
# convolutional layer architecture:
n_conv = 64
k_conv = 3
mp_size = 4
# LSTM layer architecture:
n_lstm = 64
drop_lstm = 0.2
In [3]:
(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words)
In [4]:
x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
In [5]:
model = Sequential()
model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(Conv1D(n_conv, k_conv, activation='relu'))
model.add(MaxPooling1D(mp_size))
model.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm)))
model.add(Dense(1, activation='sigmoid'))
In [6]:
# LSTM layer parameters double due to both reading directions
model.summary()
In [7]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
In [8]:
modelcheckpoint = ModelCheckpoint(filepath=output_dir+"/weights.{epoch:02d}.hdf5")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
In [9]:
# 87.4% validation accuracy in epoch 1
# slightly higher than bidirectional LSTM and about the same as stacked biLSTM
# but epochs are a third as long, or one-sixth as long, respectively
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])
Out[9]:
In [10]:
model.load_weights(output_dir+"/weights.00.hdf5") # zero-indexed
In [11]:
y_hat = model.predict_proba(x_valid)
In [12]:
plt.hist(y_hat)
_ = plt.axvline(x=0.5, color='orange')
In [13]:
"{:0.2f}".format(roc_auc_score(y_valid, y_hat)*100.0)
Out[13]:
In [ ]: