In [8]:
'''
#Trains an LSTM model on the IMDB sentiment classification task.


The dataset is actually too small for LSTM to be of any advantage
compared to simpler, much faster methods such as TF-IDF + LogReg.


**Notes**


- RNNs are tricky. Choice of batch size is important,
choice of loss and optimizer is critical, etc.
Some configurations won't converge.


- LSTM loss decrease patterns during training can be quite different
from what you see with CNNs/MLPs/etc.

'''
#from __future__ import print_function

#import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb


max_features = 20000
# cut texts after this number of words (among top max_features most common words)
maxlen = 80
batch_size = 32


print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')


print('padding sequences')
x_train = sequence.pad_sequences(x_train, maxlen = maxlen)
x_test = sequence.pad_sequences(x_test, maxlen = maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('build model...')
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout = 0.2, recurrent_dropout = 0.2))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy',
            optimizer = 'adam',
            metrics = ['accuracy'])

print('train...')
model.fit(x_train, y_train,
         batch_size = batch_size,
         epochs = 1,
         validation_data = (x_test, y_test))
score, acc = model.evaluate(x_test, y_test,
                           batch_size = batch_size)

print('Test score:', score)
print('Test accuracy:', acc)


Loading data...
25000 train sequences
25000 test sequences
padding sequences
x_train shape: (25000, 80)
x_test shape: (25000, 80)
build model...
train...
Train on 25000 samples, validate on 25000 samples
Epoch 1/1
25000/25000 [==============================] - 107s 4ms/step - loss: 0.4592 - acc: 0.7844 - val_loss: 0.3911 - val_acc: 0.8269
25000/25000 [==============================] - 14s 576us/step
Test score: 0.3910539735507965
Test accuracy: 0.82688

In [ ]: