Reuters MLP


In [1]:
# from: https://github.com/fchollet/keras/blob/master/examples/reuters_mlp.py

In [ ]:
# Trains and evaluate a simple MLP on the Reuters newswire topic classification task.

In [2]:
from __future__ import print_function
import numpy as np
np.random.seed(42)  # for reproducibility

In [3]:
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.utils import np_utils
from keras.preprocessing.text import Tokenizer


Using TensorFlow backend.

In [4]:
max_words = 1000
batch_size = 32
nb_epoch = 5

In [5]:
print('Loading data...')
(X_train, y_train), (X_test, y_test) = reuters.load_data(nb_words=max_words, test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')


Loading data...
Downloading data from https://s3.amazonaws.com/text-datasets/reuters.pkl
8880128/9211982 [===========================>..] - ETA: 0s8982 train sequences
2246 test sequences

In [6]:
nb_classes = np.max(y_train)+1
print(nb_classes, 'classes')


46 classes

In [7]:
print('Vectorizing sequence data...')
tokenizer = Tokenizer(nb_words=max_words)
X_train = tokenizer.sequences_to_matrix(X_train, mode='binary')
X_test = tokenizer.sequences_to_matrix(X_test, mode='binary')
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)


Vectorizing sequence data...
X_train shape: (8982, 1000)
X_test shape: (2246, 1000)

In [8]:
print('Convert class vector to binary class matrix (for use with categorical_crossentropy)')
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
print('Y_train shape:', Y_train.shape)
print('Y_test shape:', Y_test.shape)


Convert class vector to binary class matrix (for use with categorical_crossentropy)
Y_train shape: (8982, 46)
Y_test shape: (2246, 46)

In [9]:
print('Building model...')
model = Sequential()
model.add(Dense(512, input_shape=(max_words,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))


Building model...

In [10]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [11]:
history = model.fit(X_train, Y_train,
                    nb_epoch=nb_epoch, batch_size=batch_size,
                    verbose=1, validation_split=0.1)
score = model.evaluate(X_test, Y_test,
                       batch_size=batch_size, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])


Train on 8083 samples, validate on 899 samples
Epoch 1/5
8083/8083 [==============================] - 2s - loss: 1.4216 - acc: 0.6804 - val_loss: 1.0657 - val_acc: 0.7653
Epoch 2/5
8083/8083 [==============================] - 2s - loss: 0.7787 - acc: 0.8179 - val_loss: 0.9224 - val_acc: 0.7998
Epoch 3/5
8083/8083 [==============================] - 2s - loss: 0.5565 - acc: 0.8654 - val_loss: 0.8771 - val_acc: 0.8076
Epoch 4/5
8083/8083 [==============================] - 2s - loss: 0.4081 - acc: 0.9002 - val_loss: 0.8719 - val_acc: 0.8053
Epoch 5/5
8083/8083 [==============================] - 2s - loss: 0.3314 - acc: 0.9157 - val_loss: 0.8879 - val_acc: 0.8154
2246/2246 [==============================] - 0s     
Test score: 0.87844823326
Test accuracy: 0.793410507569

In [ ]: