Chapter 6.3.8 - Using bidirectional RNNs

LSTM with reversed sequences


In [1]:
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense

In [2]:
# Number of words to consider as features
max_features = 10000
# Cut texts after this number of words
maxlen = 500

In [3]:
# Load data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

In [4]:
# Reverse sequences
x_train = [x[::-1] for x in x_train]
x_test = [x[::-1] for x in x_test]

In [6]:
x_train[0]


Out[6]:
[32,
 178,
 19,
 5345,
 16,
 15,
 32,
 103,
 113,
 4472,
 16,
 5,
 283,
 16,
 12,
 88,
 1334,
 38,
 16,
 65,
 226,
 4,
 104,
 25,
 92,
 224,
 28,
 36,
 51,
 18,
 5535,
 30,
 144,
 5,
 480,
 26,
 476,
 134,
 21,
 22,
 226,
 4,
 18,
 7486,
 194,
 6,
 141,
 26,
 56,
 2071,
 32,
 98,
 297,
 15,
 381,
 4,
 88,
 104,
 13,
 1029,
 2,
 4,
 7,
 46,
 317,
 400,
 26,
 476,
 530,
 43,
 71,
 36,
 723,
 5,
 3766,
 7,
 2,
 4,
 256,
 15,
 5952,
 117,
 107,
 4,
 8,
 2,
 82,
 16,
 407,
 14,
 5,
 52,
 77,
 28,
 215,
 12,
 22,
 6,
 33,
 1415,
 25,
 48,
 135,
 36,
 51,
 124,
 25,
 5,
 619,
 38,
 16,
 12,
 130,
 4,
 33,
 3785,
 66,
 480,
 16,
 5244,
 2223,
 4,
 5,
 106,
 8,
 316,
 8,
 12,
 386,
 62,
 5,
 2,
 18,
 626,
 16,
 12,
 17,
 515,
 17,
 22,
 4,
 1247,
 13,
 15,
 76,
 38,
 530,
 43,
 16,
 12,
 87,
 71,
 22,
 4,
 469,
 4613,
 1920,
 4,
 22,
 14,
 19,
 2025,
 147,
 6,
 16,
 50,
 192,
 4,
 447,
 13,
 38,
 546,
 17,
 1111,
 4536,
 172,
 4,
 39,
 385,
 336,
 2,
 167,
 112,
 172,
 4,
 150,
 5,
 284,
 480,
 35,
 9,
 2,
 670,
 50,
 112,
 838,
 43,
 100,
 25,
 5,
 256,
 36,
 173,
 4,
 3941,
 66,
 4468,
 458,
 65,
 1385,
 1622,
 973,
 530,
 43,
 16,
 22,
 14,
 1]

In [7]:
# Sequence padding
x_train = pad_sequences(x_train, maxlen = maxlen)
x_test = pad_sequences(x_test, maxlen = maxlen)

In [8]:
model = Sequential()
model.add(Embedding(input_dim = max_features, output_dim = 128))
model.add(LSTM(units = 32))
model.add(Dense(units = 1, activation = 'sigmoid'))

model.compile(optimizer = 'rmsprop',
              loss = 'binary_crossentropy',
              metrics = ['acc'])
history = model.fit(x = x_train, 
                    y = y_train,
                    epochs = 10,
                    batch_size = 128,
                    validation_split = 0.2)


Train on 20000 samples, validate on 5000 samples
Epoch 1/10
20000/20000 [==============================] - 218s 11ms/step - loss: 0.4859 - acc: 0.7709 - val_loss: 0.3793 - val_acc: 0.8592
Epoch 2/10
20000/20000 [==============================] - 207s 10ms/step - loss: 0.3115 - acc: 0.8814 - val_loss: 0.3125 - val_acc: 0.8764
Epoch 3/10
20000/20000 [==============================] - 209s 10ms/step - loss: 0.2604 - acc: 0.9035 - val_loss: 0.3929 - val_acc: 0.8280
Epoch 4/10
20000/20000 [==============================] - 188s 9ms/step - loss: 0.2224 - acc: 0.9196 - val_loss: 0.4234 - val_acc: 0.8704
Epoch 5/10
20000/20000 [==============================] - 198s 10ms/step - loss: 0.1944 - acc: 0.9322 - val_loss: 0.4535 - val_acc: 0.8580
Epoch 6/10
20000/20000 [==============================] - 189s 9ms/step - loss: 0.1753 - acc: 0.9366 - val_loss: 0.3544 - val_acc: 0.8716
Epoch 7/10
20000/20000 [==============================] - 189s 9ms/step - loss: 0.1555 - acc: 0.9468 - val_loss: 0.4683 - val_acc: 0.8656
Epoch 8/10
20000/20000 [==============================] - 179s 9ms/step - loss: 0.1463 - acc: 0.9507 - val_loss: 0.4100 - val_acc: 0.8620
Epoch 9/10
20000/20000 [==============================] - 196s 10ms/step - loss: 0.1305 - acc: 0.9568 - val_loss: 0.3772 - val_acc: 0.8726
Epoch 10/10
20000/20000 [==============================] - 194s 10ms/step - loss: 0.1174 - acc: 0.9593 - val_loss: 0.4277 - val_acc: 0.8716

Bidirectional RNN


In [9]:
# Clearning the session
from keras import backend as K
K.clear_session()

In [10]:
from keras.layers import Bidirectional

In [11]:
model = Sequential()
model.add(Embedding(input_dim = max_features, output_dim = 32))
model.add(Bidirectional(LSTM(units = 32)))
model.add(Dense(units = 1, activation='sigmoid'))

model.compile(optimizer = 'rmsprop', 
              loss = 'binary_crossentropy', 
              metrics = ['acc'])
history = model.fit(x = x_train, 
                    y = y_train, 
                    epochs = 10, 
                    batch_size = 128, 
                    validation_split = 0.2)


Train on 20000 samples, validate on 5000 samples
Epoch 1/10
20000/20000 [==============================] - 432s 22ms/step - loss: 0.5571 - acc: 0.7197 - val_loss: 0.3622 - val_acc: 0.8662
Epoch 2/10
20000/20000 [==============================] - 471s 24ms/step - loss: 0.3402 - acc: 0.8692 - val_loss: 0.3410 - val_acc: 0.8806
Epoch 3/10
20000/20000 [==============================] - 423s 21ms/step - loss: 0.2719 - acc: 0.8981 - val_loss: 0.4115 - val_acc: 0.8172
Epoch 4/10
20000/20000 [==============================] - 384s 19ms/step - loss: 0.2352 - acc: 0.9152 - val_loss: 0.3288 - val_acc: 0.8768
Epoch 5/10
20000/20000 [==============================] - 404s 20ms/step - loss: 0.2108 - acc: 0.9253 - val_loss: 0.3727 - val_acc: 0.8814
Epoch 6/10
20000/20000 [==============================] - 348s 17ms/step - loss: 0.1851 - acc: 0.9353 - val_loss: 0.4753 - val_acc: 0.8028
Epoch 7/10
20000/20000 [==============================] - 347s 17ms/step - loss: 0.1714 - acc: 0.9397 - val_loss: 0.3587 - val_acc: 0.8806
Epoch 8/10
20000/20000 [==============================] - 344s 17ms/step - loss: 0.1516 - acc: 0.9487 - val_loss: 0.3914 - val_acc: 0.8492
Epoch 9/10
20000/20000 [==============================] - 344s 17ms/step - loss: 0.1412 - acc: 0.9529 - val_loss: 0.3413 - val_acc: 0.8656
Epoch 10/10
20000/20000 [==============================] - 391s 20ms/step - loss: 0.1361 - acc: 0.9562 - val_loss: 0.3552 - val_acc: 0.8732