In [1]:
from keras.datasets import reuters

In [2]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)


Downloading data from https://s3.amazonaws.com/text-datasets/reuters.npz
2113536/2110848 [==============================] - 4s 2us/step

In [3]:
len(train_data)


Out[3]:
8982

In [4]:
len(test_data)


Out[4]:
2246

In [5]:
word_index = reuters.get_word_index()


Downloading data from https://s3.amazonaws.com/text-datasets/reuters_word_index.json
557056/550378 [==============================] - 3s 5us/step

In [6]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_newswire = ' ' .join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])

In [7]:
decoded_newswire


Out[7]:
'? ? ? said as a result of its december acquisition of space co it expects earnings per share in 1987 of 1 15 to 1 30 dlrs per share up from 70 cts in 1986 the company said pretax net should rise to nine to 10 mln dlrs from six mln dlrs in 1986 and rental operation revenues to 19 to 22 mln dlrs from 12 5 mln dlrs it said cash flow per share this year should be 2 50 to three dlrs reuter 3'

In [8]:
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [11]:
# Manual
def to_one_hot(labels, dimension=46):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1
    return results
one_hot_train_labels = to_one_hot(train_labels)
one_hot_test_labels = to_one_hot(test_labels)

In [12]:
# Built-in way
from keras.utils.np_utils import to_categorical

one_hot_train_labels = to_categorical(train_labels)
one_hot_test_labels = to_categorical(test_labels)

In [13]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

y_val = one_hot_train_labels[:1000]
partial_y_train = one_hot_train_labels[1000:]

In [15]:
history = model.fit(partial_x_train, partial_y_train,
                   epochs=20, batch_size=512, validation_data=(x_val, y_val))


Train on 7982 samples, validate on 1000 samples
Epoch 1/20
7982/7982 [==============================] - 2s 255us/step - loss: 2.5306 - acc: 0.4962 - val_loss: 1.7180 - val_acc: 0.6120
Epoch 2/20
7982/7982 [==============================] - 1s 82us/step - loss: 1.4430 - acc: 0.6878 - val_loss: 1.3435 - val_acc: 0.7060
Epoch 3/20
7982/7982 [==============================] - 1s 83us/step - loss: 1.0929 - acc: 0.7661 - val_loss: 1.1704 - val_acc: 0.7430
Epoch 4/20
7982/7982 [==============================] - 1s 83us/step - loss: 0.8682 - acc: 0.8166 - val_loss: 1.0788 - val_acc: 0.7600
Epoch 5/20
7982/7982 [==============================] - 1s 84us/step - loss: 0.7020 - acc: 0.8483 - val_loss: 0.9844 - val_acc: 0.7830
Epoch 6/20
7982/7982 [==============================] - 1s 85us/step - loss: 0.5666 - acc: 0.8796 - val_loss: 0.9401 - val_acc: 0.8030
Epoch 7/20
7982/7982 [==============================] - 1s 82us/step - loss: 0.4592 - acc: 0.9039 - val_loss: 0.9090 - val_acc: 0.8010
Epoch 8/20
7982/7982 [==============================] - 1s 82us/step - loss: 0.3704 - acc: 0.9226 - val_loss: 0.9359 - val_acc: 0.7890
Epoch 9/20
7982/7982 [==============================] - 1s 85us/step - loss: 0.3036 - acc: 0.9308 - val_loss: 0.8912 - val_acc: 0.8070
Epoch 10/20
7982/7982 [==============================] - 1s 82us/step - loss: 0.2539 - acc: 0.9412 - val_loss: 0.9059 - val_acc: 0.8110
Epoch 11/20
7982/7982 [==============================] - 1s 83us/step - loss: 0.2185 - acc: 0.9471 - val_loss: 0.9152 - val_acc: 0.8120
Epoch 12/20
7982/7982 [==============================] - 1s 84us/step - loss: 0.1872 - acc: 0.9511 - val_loss: 0.9045 - val_acc: 0.8150
Epoch 13/20
7982/7982 [==============================] - 1s 84us/step - loss: 0.1696 - acc: 0.9523 - val_loss: 0.9338 - val_acc: 0.8090
Epoch 14/20
7982/7982 [==============================] - 1s 85us/step - loss: 0.1531 - acc: 0.9554 - val_loss: 0.9644 - val_acc: 0.8090
Epoch 15/20
7982/7982 [==============================] - 1s 84us/step - loss: 0.1387 - acc: 0.9555 - val_loss: 0.9697 - val_acc: 0.8120
Epoch 16/20
7982/7982 [==============================] - 1s 84us/step - loss: 0.1310 - acc: 0.9562 - val_loss: 1.0280 - val_acc: 0.8040
Epoch 17/20
7982/7982 [==============================] - 1s 84us/step - loss: 0.1214 - acc: 0.9577 - val_loss: 1.0307 - val_acc: 0.7950
Epoch 18/20
7982/7982 [==============================] - 1s 83us/step - loss: 0.1193 - acc: 0.9582 - val_loss: 1.0454 - val_acc: 0.8080
Epoch 19/20
7982/7982 [==============================] - 1s 83us/step - loss: 0.1136 - acc: 0.9595 - val_loss: 1.1013 - val_acc: 0.7950
Epoch 20/20
7982/7982 [==============================] - 1s 85us/step - loss: 0.1104 - acc: 0.9595 - val_loss: 1.0710 - val_acc: 0.8020

In [20]:
import matplotlib.pyplot as plt
loss = history.history['loss'] 
val_loss = history.history['val_loss'] 
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss') 
plt.plot(epochs, val_loss, 'b', label='Validation loss') 
plt.title('Training and validation loss') 
plt.xlabel('Epochs') 
plt.ylabel('Loss') 
plt.legend()
plt.show()



In [22]:
acc = history.history['acc']
val_acc = history.history['val_acc']
plt.plot(epochs, acc, 'bo', label='Training acc') 
plt.plot(epochs, val_acc, 'b', label='Validation acc') 
plt.title('Training and validation accuracy') 
plt.xlabel('Epochs') 
plt.ylabel('Loss') 
plt.legend()
plt.show()



In [23]:
predictions = model.predict(x_test)

In [24]:
predictions[0].shape


Out[24]:
(46,)

In [ ]:
np.sum