SimpleRNN layer takes input of shape (batch_size, timesteps, input_features)
In [2]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN
In [3]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32))
model.summary()
Like all recurrent layers in Keras, SimpleRNN can be run in two different modes: it can return either the full sequences of successive outputs for each timestep (a 3D tensor of shape (batch_size, timesteps, output_features)), or it can return only the last output for each input sequence (a 2D tensor of shape (batch_size, output_features))
In [5]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32, return_sequences=True))
model.summary()
Stacking multiple recurrent layers on top of each other can have benefits, like with convolutional neural networks.
In [8]:
model = Sequential()
model.add(Embedding(input_dim = 10000,
output_dim = 32))
model.add(SimpleRNN(units = 32,
return_sequences = True))
model.add(SimpleRNN(units = 32,
return_sequences = True))
model.add(SimpleRNN(units = 32,
return_sequences = True))
# The last layer returns only the last outputs
model.add(SimpleRNN(32))
model.summary()
In [9]:
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
In [10]:
# Number of words to be used as features
max_features = 10000
# Cutting the review after this number of words
maxlen = 500
batch_size = 32
In [12]:
print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words=max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')
print('Pad sequences (samples x time)')
input_train = pad_sequences(input_train, maxlen=maxlen)
input_test = pad_sequences(input_test, maxlen=maxlen)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)
In [13]:
from keras.layers import Dense
model = Sequential()
model.add(Embedding(input_dim = max_features,
output_dim = 32))
model.add(SimpleRNN(units = 32))
model.add(Dense(units = 1,
activation='sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
history = model.fit(x = input_train,
y = y_train,
epochs = 10,
batch_size = 128,
validation_split = 0.2)
In [14]:
import matplotlib.pyplot as plt
In [15]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
In [16]:
from keras.layers import LSTM
model = Sequential()
model.add(Embedding(input_dim = max_features,
output_dim = 32))
model.add(LSTM(units = 32))
model.add(Dense(units = 1,
activation = 'sigmoid'))
model.compile(optimizer = 'rmsprop',
loss = 'binary_crossentropy',
metrics = ['acc'])
history = model.fit(x = input_train,
y = y_train,
epochs = 10,
batch_size = 128,
validation_split = 0.2)
In [18]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
Predictably, LSTM seems to handle longer sequence better than SimpleRNN, but it is much slower.
No free lunch theorem in practise.