In [1]:
%matplotlib inline
import math
import numpy as np
import utils; reload(utils)
from utils import *
from sympy import Symbol
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Lambda, Dense
from matplotlib import pyplot as plt
In [2]:
# We set the "seed" so we make the results a bit more predictable.
np.random.seed(1)
In [3]:
# Let's load the data. Mnist can be loaded really easily with Keras!
(X_train, y_train), (X_test, y_test) = mnist.load_data()
In [4]:
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
Out[4]:
In [5]:
# Keras needs to have at least one channel (color), so we expand the dimensions here.
X_test = np.expand_dims(X_test,1)
X_train = np.expand_dims(X_train,1)
In [6]:
# We would like to have an output in the form: [0, 0, 1, 0...] so we transform the labels with
# "onehot".
y_train = onehot(y_train)
y_test = onehot(y_test)
In [7]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)
In [8]:
# We normalize the inputs so the training is more stable.
def norm_input(x): return (x-mean_px)/std_px
In [26]:
# Let's start by implementing a really basic Linear Model.
model = Sequential([
Lambda(norm_input, input_shape=(1,28,28)),
Flatten(),
Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
In [27]:
# This class creates batches based on images in "array-form". It's also quite powerful
# as it allows us to do Data Augmentation.
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)
In [28]:
# We train the model with the batches.
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[28]:
In [29]:
# We increase the learning rate until we get overfitting.
model.optimizer.lr=0.1
In [30]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[30]:
In [31]:
# We decrease the learning rate as we want it to go slower because our accuracy
# didn't increase too much in the last step.
model.optimizer.lr=0.01
In [32]:
# We train the model with the batches 4 times so we reach overfitting.
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[32]:
In [33]:
# We are still underfitting! Our model is clearly not complex enough.
In [34]:
# We add a new hidden dense layer and follow the same process as before.
model = Sequential([
Lambda(norm_input, input_shape=(1,28,28)),
Flatten(),
Dense(512, activation='softmax'),
Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
In [36]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[36]:
In [37]:
model.optimizer.lr=0.1
In [38]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[38]:
In [39]:
model.optimizer.lr=0.01
In [40]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[40]:
In [41]:
# We are clearly overfitting this time!
# Meaning that the accuracy of the training data is much higher than the one in the
# validation set
In [42]:
# Now we try out a VGG-style model, with several Convolution2D layers and MaxPooling2D.
model = Sequential([
Lambda(norm_input, input_shape=(1,28,28)),
Convolution2D(32,3,3, activation='relu'),
Convolution2D(32,3,3, activation='relu'),
MaxPooling2D(),
Convolution2D(64,3,3, activation='relu'),
Convolution2D(64,3,3, activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(512, activation='relu'),
Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
In [43]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[43]:
In [44]:
model.optimizer.lr=0.1
In [45]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[45]:
In [46]:
model.optimizer.lr=0.01
In [47]:
model.fit_generator(batches, batches.N, nb_epoch=8,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[47]:
In [ ]:
# This result is incredible! But we are overfitting, let's introduce "Data Augmentation" so
# we can deal with that.
In [48]:
model = Sequential([
Lambda(norm_input, input_shape=(1,28,28)),
Convolution2D(32,3,3, activation='relu'),
Convolution2D(32,3,3, activation='relu'),
MaxPooling2D(),
Convolution2D(64,3,3, activation='relu'),
Convolution2D(64,3,3, activation='relu'),
MaxPooling2D(),
Flatten(),
Dense(512, activation='relu'),
Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
In [49]:
# This command will randomly modify the images (e.g rotation, zoom, ...) so it seems like we have more
# images.
gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
height_shift_range=0.08, zoom_range=0.08)
batches = gen.flow(X_train, y_train, batch_size=64)
test_batches = gen.flow(X_test, y_test, batch_size=64)
In [50]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[50]:
In [51]:
model.optimizer.lr=0.1
In [52]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[52]:
In [53]:
model.optimizer.lr=0.01
In [54]:
model.fit_generator(batches, batches.N, nb_epoch=8,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[54]:
In [55]:
model.optimizer.lr=0.001
In [56]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[56]:
In [ ]:
# Not bad, we are still overfitting but much less! Let's see other techniques that might be
# useful in your analyses.
In [57]:
# Let's apply now "Batch Normalization" to normalize the different weights in the CNN.
model = Sequential([
Lambda(norm_input, input_shape=(1,28,28)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
Convolution2D(32,3,3, activation='relu'),
MaxPooling2D(),
BatchNormalization(axis=1),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
Convolution2D(64,3,3, activation='relu'),
MaxPooling2D(),
Flatten(),
BatchNormalization(),
Dense(512, activation='relu'),
BatchNormalization(),
Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
In [58]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[58]:
In [59]:
model.optimizer.lr=0.1
In [60]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[60]:
In [61]:
model.optimizer.lr=0.01
In [62]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[62]:
In [63]:
model.optimizer.lr=0.001
In [64]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[64]:
In [65]:
# We are overfitting again, let's add a Dropout layer
def get_model_bn_do():
model = Sequential([
Lambda(norm_input, input_shape=(1,28,28)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
Convolution2D(32,3,3, activation='relu'),
MaxPooling2D(),
BatchNormalization(axis=1),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
Convolution2D(64,3,3, activation='relu'),
MaxPooling2D(),
Flatten(),
BatchNormalization(),
Dense(512, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(10, activation='softmax')
])
model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
return model
model = get_model_bn_do()
In [66]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[66]:
In [67]:
model.optimizer.lr=0.1
In [68]:
model.fit_generator(batches, batches.N, nb_epoch=4,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[68]:
In [69]:
model.optimizer.lr=0.01
In [70]:
model.fit_generator(batches, batches.N, nb_epoch=12,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[70]:
In [71]:
model.optimizer.lr=0.001
In [72]:
model.fit_generator(batches, batches.N, nb_epoch=1,
validation_data=test_batches, nb_val_samples=test_batches.N)
Out[72]:
In [73]:
# Let's try finally with "Ensembling"
def fit_model():
model = get_model_bn_do()
model.fit_generator(batches, batches.N, nb_epoch=1, verbose=0,
validation_data=test_batches, nb_val_samples=test_batches.N)
model.optimizer.lr=0.1
model.fit_generator(batches, batches.N, nb_epoch=4, verbose=0,
validation_data=test_batches, nb_val_samples=test_batches.N)
model.optimizer.lr=0.01
model.fit_generator(batches, batches.N, nb_epoch=12, verbose=0,
validation_data=test_batches, nb_val_samples=test_batches.N)
model.optimizer.lr=0.001
model.fit_generator(batches, batches.N, nb_epoch=18, verbose=0,
validation_data=test_batches, nb_val_samples=test_batches.N)
return model
In [ ]:
models = [fit_model() for i in range(6)]
In [ ]:
path = "data/mnist/"
model_path = path + 'models/'
In [ ]:
for i,m in enumerate(models):
m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')
In [ ]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])
In [ ]:
evals.mean(axis=0)
In [ ]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])
In [ ]:
all_preds.shape
In [ ]:
avg_preds = all_preds.mean(axis=0)
In [ ]:
keras.metrics.categorical_accuracy(y_test, avg_preds).eval()