In [1]:
import numpy as np
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Lambda, Embedding, GlobalAveragePooling1D
from keras.datasets import imdb
from keras import backend as K


Using TensorFlow backend.

In [2]:
vocabulary_size = 5000
embedding_size = 50
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocabulary_size)

In [3]:
print(x_train[0])


[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 2, 19, 178, 32]

In [4]:
word2num = imdb.get_word_index()

In [5]:
num2word = {v:k for k,v in word2num.items()}

In [6]:
print(" - ".join(num2word[x] for x in x_train[0]))


the - as - you - with - out - themselves - powerful - lets - loves - their - becomes - reaching - had - journalist - of - lot - from - anyone - to - have - after - out - atmosphere - never - more - room - and - it - so - heart - shows - to - years - of - every - never - going - and - help - moments - or - of - every - chest - visual - movie - except - her - was - several - of - enough - more - with - is - now - current - film - as - you - of - mine - potentially - unfortunately - of - you - than - him - that - with - out - themselves - her - get - for - was - camp - of - you - movie - sometimes - movie - that - with - scary - but - and - to - story - wonderful - that - in - seeing - in - character - to - of - 70s - and - with - heart - had - shadows - they - of - here - that - with - her - serious - to - have - does - when - from - why - what - have - critics - they - is - you - that - isn't - one - will - very - to - as - itself - with - other - and - in - of - seen - over - and - for - anyone - of - and - br - show's - to - whether - from - than - out - themselves - history - he - name - half - some - br - of - and - odd - was - two - most - of - mean - for - 1 - any - an - boat - she - he - should - is - thought - and - but - of - script - you - not - while - history - he - heart - to - real - at - and - but - when - from - one - bit - then - have - two - of - script - their - with - her - nobody - most - that - with - wasn't - to - with - armed - acting - watch - an - for - with - and - film - want - an

In [7]:
y_train[0]


Out[7]:
1

In [8]:
x_train.shape, x_train.dtype


Out[8]:
((25000,), dtype('O'))

In [9]:
maxlen = 400
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

In [10]:
x_train[0]


Out[10]:
array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    1,   14,   22,   16,   43,
        530,  973, 1622, 1385,   65,  458, 4468,   66, 3941,    4,  173,
         36,  256,    5,   25,  100,   43,  838,  112,   50,  670,    2,
          9,   35,  480,  284,    5,  150,    4,  172,  112,  167,    2,
        336,  385,   39,    4,  172, 4536, 1111,   17,  546,   38,   13,
        447,    4,  192,   50,   16,    6,  147, 2025,   19,   14,   22,
          4, 1920, 4613,  469,    4,   22,   71,   87,   12,   16,   43,
        530,   38,   76,   15,   13, 1247,    4,   22,   17,  515,   17,
         12,   16,  626,   18,    2,    5,   62,  386,   12,    8,  316,
          8,  106,    5,    4, 2223,    2,   16,  480,   66, 3785,   33,
          4,  130,   12,   16,   38,  619,    5,   25,  124,   51,   36,
        135,   48,   25, 1415,   33,    6,   22,   12,  215,   28,   77,
         52,    5,   14,  407,   16,   82,    2,    8,    4,  107,  117,
          2,   15,  256,    4,    2,    7, 3766,    5,  723,   36,   71,
         43,  530,  476,   26,  400,  317,   46,    7,    4,    2, 1029,
         13,  104,   88,    4,  381,   15,  297,   98,   32, 2071,   56,
         26,  141,    6,  194,    2,   18,    4,  226,   22,   21,  134,
        476,   26,  480,    5,  144,   30,    2,   18,   51,   36,   28,
        224,   92,   25,  104,    4,  226,   65,   16,   38, 1334,   88,
         12,   16,  283,    5,   16, 4472,  113,  103,   32,   15,   16,
          2,   19,  178,   32], dtype=int32)

In [11]:
model = Sequential()
model.add(Embedding(input_dim=vocabulary_size, output_dim=embedding_size, embeddings_initializer='glorot_uniform'))
model.add(GlobalAveragePooling1D())
model.add(Dense(1, activation="sigmoid"))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=32,
          validation_data=(x_test, y_test))


Train on 25000 samples, validate on 25000 samples
Epoch 1/10
25000/25000 [==============================] - 2s - loss: 0.6091 - acc: 0.7318 - val_loss: 0.5019 - val_acc: 0.8090
Epoch 2/10
25000/25000 [==============================] - 1s - loss: 0.4120 - acc: 0.8578 - val_loss: 0.3708 - val_acc: 0.8640
Epoch 3/10
25000/25000 [==============================] - 1s - loss: 0.3239 - acc: 0.8823 - val_loss: 0.3230 - val_acc: 0.8760
Epoch 4/10
25000/25000 [==============================] - 1s - loss: 0.2830 - acc: 0.8951 - val_loss: 0.3019 - val_acc: 0.8818
Epoch 5/10
25000/25000 [==============================] - 1s - loss: 0.2573 - acc: 0.9039 - val_loss: 0.2932 - val_acc: 0.8828
Epoch 6/10
25000/25000 [==============================] - 1s - loss: 0.2396 - acc: 0.9108 - val_loss: 0.2855 - val_acc: 0.8839
Epoch 7/10
25000/25000 [==============================] - 2s - loss: 0.2262 - acc: 0.9161 - val_loss: 0.2808 - val_acc: 0.8874
Epoch 8/10
25000/25000 [==============================] - 1s - loss: 0.2150 - acc: 0.9203 - val_loss: 0.2796 - val_acc: 0.8870
Epoch 9/10
25000/25000 [==============================] - 1s - loss: 0.2055 - acc: 0.9243 - val_loss: 0.2821 - val_acc: 0.8864
Epoch 10/10
25000/25000 [==============================] - 1s - loss: 0.1975 - acc: 0.9276 - val_loss: 0.2851 - val_acc: 0.8865
Out[11]:
<keras.callbacks.History at 0x7f8650f69f28>

In [12]:
score, acc = model.evaluate(x_test, y_test, batch_size=32)
print()
print('Test score:', score)
print('Test accuracy:', acc)


24672/25000 [============================>.] - ETA: 0s
Test score: 0.285111816511
Test accuracy: 0.88648