MNIST

This notebook introduces some aspects of the Keras API, demonstrated on the MNIST handwritten digit data set (a classic benchmark in computer vision).


In [ ]:
%matplotlib inline
%matplotlib notebook
import numpy as np
import matplotlib.pyplot as plt

In [ ]:
# Import the MNIST data set (integrated in Keras)
from keras.datasets import mnist
(x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = mnist.load_data()

In [ ]:
# Plot a random instance
i = np.random.choice(np.arange(x_train_mnist.shape[0]), 1)[0]
print '({}) Label: {}'.format(i, y_train_mnist[i])
plt.imshow(x_train_mnist[i],cmap='gray')

In [ ]:
# The input data consists of 28x28 images. However, models like logistic regression or regular neural networks
# do not understand 2D input. Therefore, we flatten the input images to convert them into 1D vectors
x_train_flat = x_train_mnist.reshape([60000, 28*28])
x_test_flat = x_test_mnist.reshape([10000, 28*28])

In [ ]:
# Train a logistic (softmax) regression model on MNIST to demonstrate poor performance
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(multi_class='multinomial', solver='lbfgs')
clf.fit(x_train_flat,y_train_mnist)

In [ ]:
# Print accuracy to evaluate the performance of logistic regression
from sklearn import metrics
print 'Train accuracy: {}'.format(metrics.accuracy_score(y_train_mnist, clf.predict(x_train_flat)))
print 'Test accuracy: {}'.format(metrics.accuracy_score(y_test_mnist, clf.predict(x_test_flat)))

In [ ]:
# Build a neural network. Option 1: 1 layer (equivalent to logistic regression)
from keras.models import Sequential
from keras.layers import Dense, Activation
model = Sequential()
model.add(Dense(units=10, input_dim=28*28))
model.add(Activation('softmax'))

In [ ]:
# Print a representation of the network architecture
# It's always a good idea to print this visualization of the network, to make sure we have built what we had in mind
# Also, it is important to know how many parameters we are going to have to estimate.
model.summary()

In [ ]:
# Build a neural network
from keras.models import Sequential
from keras.layers import Dense, Activation

model = Sequential()

model.add(Dense(units=64, input_dim=28*28))
model.add(Activation('relu'))

model.add(Dense(units=10))
model.add(Activation('softmax'))

In [ ]:
model.summary()

In [ ]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [ ]:
def get_splits(X, y, ratio=0.1, cat=False):
    """
    Finds a random split of size ratio*size(data).
    Returns the corresponding splits of X and y.
    """
    val_ids = np.random.choice(np.arange(X.shape[0]), int(X.shape[0]*ratio), replace=False)
    train_ids = np.delete(np.arange(X.shape[0]), val_ids)
    x_train = X[train_ids,:]
    x_val = X[val_ids,:]
    if cat:
        y_train = y[train_ids,:]
        y_val = y[val_ids,:]
    else:
        y_train = y[train_ids]
        y_val = y[val_ids]                
    return x_train, y_train, x_val, y_val

In [ ]:
# This is a very important step when training neural networks.
# Since the objective function is usually very complex, the optimization algorithm can take many iterations.
# In addition, if the network is sufficiently complex it can overfit the training set.
#
# This problem can be diminished by keeping aside a small split of the training data and periodically evaluating
# the loss of the model on it. 
x_train, y_train, x_val, y_val = get_splits(x_train_flat, y_train_mnist, ratio=0.05, cat=False)
print 'Train: {}. Validation: {}'.format(x_train.shape, x_val.shape)

In [ ]:
# To optimize objectives like categorical cross entropy (adequate for softmax output), we need to convert
# the labels to one-hot encoding (e.g. label 2 turns into [0,0,1,0,0,0,0,0,0,0])
from keras import utils
y_train = utils.to_categorical(y_train, num_classes=10)
y_val = utils.to_categorical(y_val, num_classes=10)
y_test = utils.to_categorical(y_test_mnist, num_classes=10)

In [ ]:
# Example of one-hot encoded label
y_train[0]

In [ ]:
# Train the model for 10 epochs with full gradient
model.fit(x_train, y_train, epochs=10, batch_size=1024, validation_data=(x_val, y_val))

In [ ]:
# Lists for monitoring progress
train_loss = []
val_loss = []

In [ ]:
# Minibatch gradient descent
# Here we try another approach. Usually, neural networks are not trained computing the full gradient,
# but only the gradient on a small batch of data (usually a not too large power of 2).
# This can help avoid overfitting, escape local minima and get more frequent progress reports
fig = plt.figure()
ax = fig.gca()

for i in range(50):
    history = model.fit(x_train, y_train, epochs=1, batch_size=128, verbose=1, validation_data=(x_val, y_val))
    
    train_loss.append(history.history['loss'])
    val_loss.append(history.history['val_loss'])
    
    ax.clear()    
    ax.plot(train_loss, color='red', label='Train')
    ax.plot(val_loss, color='blue', label='Validation')

    fig.canvas.draw()

In [ ]:
# Remember you can get the weights as an array, if you a re curious and want to take a look
model.get_weights()

In [ ]:
# You can also easily save the model and its weights for later use
model.save('ann_mnist_64_relu.h5')
model.save_weights('my_model_weights.h5')

In [ ]:
from sklearn import metrics
def eval(y, preds, classes=None):
    if not classes:
        classes = np.unique(y)
    """
    Given a set of labels y and predictions preds, computes precision, recall and F1.
    """
    for i in classes:
        preds_i = [1 if j==i else 0 for j in preds]
        y_i = [1 if j==i else 0 for j in y]
        print 'Class {}:'.format(i)    
        print 'Precision: {}'.format(metrics.precision_score(y_i, preds_i))
        print 'Recall: {}'.format(metrics.recall_score(y_i, preds_i))
        print 'F1: {}'.format(metrics.f1_score(y_i, preds_i))
        print ''

In [ ]:
# Evaluate the performance of the model
# First, we tell the model to give predictions for the test set
preds = model.predict(x_test_flat)
# Then, we convert the softmax predictions to label form
preds = map(lambda x: np.argmax(x), preds)
# Now, print some classification metrics (the function above)
eval(y_test_mnist, preds)

In [ ]:
# Guess a random number from the test set
x = np.random.choice(np.arange(x_test_flat.shape[0]), 1)
x = x_test_flat[x]
img_x = x.reshape([28,28])
print np.argmax(model.predict(x))
plt.imshow(img_x,cmap='gray')

In [ ]:
# Guess the number we just wrote by hand!
# Try it at home: draw a number on a 28x28 black background, using any Paint-like app. See if the model can guess it
img=mpimg.imread('number.png')
x = img[:,:,0].reshape([1,28*28])
print np.argmax(model.predict(x))
plt.imshow(img,cmap='gray')