In [1]:
%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils
from keras.regularizers import l2
Load the MNIST dataset, flatten the images, convert the class labels, and scale the data.
In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 28**2).astype('float32') / 255
X_test = X_test.reshape(10000, 28**2).astype('float32') / 255
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)
In [3]:
model = Sequential()
model.add(Dense(512, input_shape=(28 * 28,)))
model.add(Activation("sigmoid"))
model.add(Dense(10))
sgd = SGD(lr = 0.01, momentum = 0.9, nesterov = True)
model.compile(loss='mse', optimizer=sgd)
Fit the model over 25 epochs.
In [4]:
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10,
verbose=1, show_accuracy=True, validation_split=0.1)
Out[4]:
Evaluate model on the test set
In [5]:
print("Test classification rate %0.05f" % model.evaluate(X_test, Y_test, show_accuracy=True)[1])
Predict classes on the test set.
In [6]:
y_hat = model.predict_classes(X_test)
pd.crosstab(y_hat, y_test)
Out[6]:
Let's now build a deeper model, with four hidden dense layers and dropout layers. I'll use rectified linear units as they tend to perform better on deep models. I also initilize the nodes using "glorot_normal", which uses Gaussian noise scaled by the sum of the inputs plus outputs from the node. Notice that we do not need to give an input shape to any layers other than the first.
In [7]:
model = Sequential()
model.add(Dense(512, input_shape=(28 * 28,), init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(512, init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(512, init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(512, init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
In [8]:
sgd = SGD(lr = 0.01, momentum = 0.9, nesterov = True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)
In [9]:
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10,
verbose=1, show_accuracy=True, validation_split=0.1)
Out[9]:
In [ ]:
print("Test classification rate %0.05f" % model.evaluate(X_test, Y_test, show_accuracy=True)[1])
fy_hat = model.predict_classes(X_test)
pd.crosstab(y_hat, y_test)
In [ ]:
test_wrong = [im for im in zip(X_test,y_hat,y_test) if im[1] != im[2]]
plt.figure(figsize=(15, 15))
for ind, val in enumerate(test_wrong[:100]):
plt.subplot(10, 10, ind + 1)
im = 1 - val[0].reshape((28,28))
axis("off")
plt.imshow(im, cmap='gray')
In [ ]:
model = Sequential()
model.add(Dense(16, input_shape=(28 * 28,), init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(16, init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)
model.fit(X_train, Y_train, batch_size=32, nb_epoch=10,
verbose=1, show_accuracy=True, validation_split=0.1)
The classification rate on the validation set is not nearly as predictive, but it is still not too bad overall. A model object contains a list of its layers. The weights are easy to pull out.
In [ ]:
print(model.layers) # list of the layers
print(model.layers[0].get_weights()[0].shape) # the weights
The first set of weights will be given as weights the same size as the input space. Notice how
In [ ]:
W1 = model.layers[0].get_weights()[0]
for ind, val in enumerate(W1.T):
plt.figure(figsize=(3, 3), frameon=False)
im = val.reshape((28,28))
plt.axis("off")
plt.imshow(im, cmap='seismic')
The second layer of weights will be given as a single 16x16 matrix of weights.
In [ ]:
W2 = model.layers[3].get_weights()[0]
plt.figure(figsize=(3, 3))
im = W2.reshape((16,16))
plt.axis("off")
plt.imshow(im, cmap='seismic')
In [ ]:
model = Sequential()
model.add(Dense(128, input_shape=(28 * 28,), init="glorot_normal"))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(512, init="glorot_normal",W_regularizer=l2(0.1)))
model.add(Activation("relu"))
model.add(Dropout(0.2))
model.add(Dense(512, init="glorot_normal",W_regularizer=l2(0.1)))
model.add(Activation("relu"))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))
In [ ]:
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)
model.fit(X_train, Y_train, batch_size=32, nb_epoch=5,
verbose=1, show_accuracy=True, validation_split=0.1)
In [ ]: