ConvNet in Keras

for MNIST Digit Classification


In [1]:
from __future__ import print_function

In [2]:
import numpy as np

In [4]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K


Using TensorFlow backend.

Configure Model


In [5]:
batch_size = 128 # number of samples to include in each mini-batch
nb_classes = 10 # there are ten digit classes in the MNIST data set
nb_epoch = 10 # number of epochs to train for

In [6]:
img_rows, img_cols = 28, 28 # input image dimensions
nb_filters = 32 # number of convolutional filters to use
pool_size = (2, 2) # size of pooling area for max pooling
kernel_size = (3, 3) # convolution kernel size

Load data, shuffle it, and split between test and training sets


In [8]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [9]:
if K.image_dim_ordering() == 'th':
    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [10]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


X_train shape: (60000, 1, 28, 28)
60000 train samples
10000 test samples

Convert class vectors to binary class matrices


In [11]:
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [12]:
y_train


Out[12]:
array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [13]:
Y_train


Out[13]:
array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.]])

Build Model


In [14]:
model = Sequential()

In [15]:
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='valid', input_shape=input_shape))
model.add(Activation('relu'))

In [16]:
model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
model.add(Activation('relu'))

In [17]:
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Dropout(0.25))

In [18]:
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

In [19]:
model.compile(loss='categorical_crossentropy',
              optimizer='adadelta',
              metrics=['accuracy'])

In [20]:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)


Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 63s - loss: 0.3716 - acc: 0.8848 - val_loss: 0.0888 - val_acc: 0.9726
Epoch 2/10
60000/60000 [==============================] - 62s - loss: 0.1332 - acc: 0.9604 - val_loss: 0.0602 - val_acc: 0.9806
Epoch 3/10
60000/60000 [==============================] - 62s - loss: 0.1037 - acc: 0.9696 - val_loss: 0.0497 - val_acc: 0.9830
Epoch 4/10
60000/60000 [==============================] - 63s - loss: 0.0864 - acc: 0.9740 - val_loss: 0.0428 - val_acc: 0.9853
Epoch 5/10
60000/60000 [==============================] - 62s - loss: 0.0758 - acc: 0.9776 - val_loss: 0.0375 - val_acc: 0.9875
Epoch 6/10
60000/60000 [==============================] - 62s - loss: 0.0678 - acc: 0.9804 - val_loss: 0.0354 - val_acc: 0.9881
Epoch 7/10
60000/60000 [==============================] - 62s - loss: 0.0627 - acc: 0.9806 - val_loss: 0.0338 - val_acc: 0.9895
Epoch 8/10
60000/60000 [==============================] - 62s - loss: 0.0583 - acc: 0.9824 - val_loss: 0.0321 - val_acc: 0.9895
Epoch 9/10
60000/60000 [==============================] - 62s - loss: 0.0557 - acc: 0.9832 - val_loss: 0.0318 - val_acc: 0.9890
Epoch 10/10
60000/60000 [==============================] - 62s - loss: 0.0513 - acc: 0.9847 - val_loss: 0.0298 - val_acc: 0.9895
Out[20]:
<keras.callbacks.History at 0x7fd454776d50>

In [21]:
print('Test score:', score[0])
print('Test accuracy:', score[1])


Test score: 0.0298332915191
Test accuracy: 0.9895