Transfer learning example:
1- Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
2- Freeze convolutional layers and fine-tune dense layers for the classification of digits [5..9].
Get to 99.8% test accuracy after 5 epochs for the first five digits classifier and 99.2% for the last five digits after transfer + fine-tuning.
In [1]:
from __future__ import print_function
import datetime
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
from keras import backend as K
import numpy as np
In [2]:
now = datetime.datetime.now
batch_size = 128
num_classes = 5
epochs = 5
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
kernel_size = 3
In [3]:
if K.image_data_format() == 'channels_first':
input_shape = (1, img_rows, img_cols)
else:
input_shape = (img_rows, img_cols, 1)
In [4]:
def train_model(model, train, test, num_classes):
x_train = train[0].reshape((train[0].shape[0],) + input_shape)
x_test = test[0].reshape((test[0].shape[0],) + input_shape)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(train[1], num_classes)
y_test = keras.utils.to_categorical(test[1], num_classes)
model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
t = now()
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
print('Training time: %s' % (now() - t))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
the data, shuffled and split between train and test sets
In [5]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# create two datasets one with digits below 5 and one with 5 and above
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]
x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5
define two groups of layers: feature (convolutions) and classification (dense)
In [6]:
feature_layers = [
Conv2D(filters, kernel_size,
padding='valid',
input_shape=input_shape),
Activation('relu'),
Conv2D(filters, kernel_size),
Activation('relu'),
MaxPooling2D(pool_size=pool_size),
Dropout(0.25),
Flatten(),
]
classification_layers = [
Dense(128),
Activation('relu'),
Dropout(0.5),
Dense(num_classes),
Activation('softmax')
]
In [7]:
# create complete model
model = Sequential(feature_layers + classification_layers)
# train model for 5-digit classification [0..4]
train_model(model,
(x_train_lt5, y_train_lt5),
(x_test_lt5, y_test_lt5), num_classes)
# freeze feature layers and rebuild model
for l in feature_layers:
l.trainable = False
# transfer: train dense layers for new classification task [5..9]
train_model(model,
(x_train_gte5, y_train_gte5),
(x_test_gte5, y_test_gte5), num_classes)
In [14]: