Multi-ConvNet Architectures

In this notebook, we concatenate multiple parallel convolutional nets together to classify IMDB movie reviews by their sentiment.

Load dependencies



In [ ]:

    
import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model # new!
from keras.layers import Input, concatenate # new! 
from keras.layers import Dense, Dropout, Embedding, SpatialDropout1D, Conv1D, GlobalMaxPooling1D
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import roc_auc_score 
import matplotlib.pyplot as plt 
%matplotlib inline

Set hyperparameters



In [ ]:

    
# output directory name:
output_dir = 'model_output/multiconv'

# training:
epochs = 4
batch_size = 128

# vector-space embedding: 
n_dim = 64
n_unique_words = 5000 
max_review_length = 400
pad_type = trunc_type = 'pre'
drop_embed = 0.2 

# convolutional layer architecture:
n_conv_1 = n_conv_2 = n_conv_3 = 256 
k_conv_1 = 3
k_conv_2 = 2
k_conv_3 = 4

# dense layer architecture: 
n_dense = 256
dropout = 0.2

Load data



In [ ]:

    
(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words)

Preprocess data



In [ ]:

    
x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)

Design neural network architecture



In [ ]:

    
input_layer = Input(shape=(max_review_length,), dtype='int16', name='input') # supports integers +/- 32.7k 
embedding_layer = Embedding(n_unique_words, n_dim, input_length=max_review_length, name='embedding')(input_layer)
drop_embed_layer = SpatialDropout1D(drop_embed, name='drop_embed')(embedding_layer)

# CODE HERE

# start with conv_1 only and no concat
# add conv_2
# add conv_3

dense_layer = Dense(n_dense, activation='relu', name='dense')(concat)
drop_dense_layer = Dropout(dropout, name='drop_dense')(dense_layer)
dense_2 = Dense(64, activation='relu', name='dense_2')(drop_dense_layer)
dropout_2 = Dropout(dropout, name='drop_dense_2')(dense_2)

predictions = Dense(1, activation='sigmoid', name='output')(dropout_2)

model = Model(input_layer, predictions)



In [ ]:

    
model.summary()

Configure model



In [ ]:

    
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [ ]:

    
modelcheckpoint = ModelCheckpoint(filepath=output_dir+"/weights.{epoch:02d}.hdf5")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

Train!



In [ ]:

    
# start with conv_1 only and no concat: 89.1% validation accuracy in epoch 2, as earlier notebook
# add conv_2: 89.5% in epoch 3
# add conv_3: ditto
# add dense_2: ditto in epoch 2
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])

Evaluate



In [ ]:

    
model.load_weights(output_dir+"/weights.01.hdf5") # zero-indexed



In [ ]:

    
y_hat = model.predict(x_valid)



In [ ]:

    
plt.hist(y_hat)
_ = plt.axvline(x=0.5, color='orange')



In [ ]:

    
"{:0.2f}".format(roc_auc_score(y_valid, y_hat)*100.0)



In [ ]: