In [1]:
import warnings
warnings.filterwarnings('ignore')
In [2]:
%matplotlib inline
%pylab inline
In [3]:
import matplotlib.pylab as plt
import numpy as np
In [4]:
from distutils.version import StrictVersion
In [5]:
import sklearn
print(sklearn.__version__)
assert StrictVersion(sklearn.__version__ ) >= StrictVersion('0.18.1')
In [6]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)
assert StrictVersion(tf.__version__) >= StrictVersion('1.1.0')
In [7]:
import keras
print(keras.__version__)
assert StrictVersion(keras.__version__) >= StrictVersion('2.0.0')
This script goes along the blog post "Building powerful image classification models using very little data" from blog.keras.io. It uses data that can be downloaded at: https://www.kaggle.com/c/dogs-vs-cats/data In our setup, we:
data/
train/
dogs/
dog001.jpg
dog002.jpg
...
cats/
cat001.jpg
cat002.jpg
...
validation/
dogs/
dog001.jpg
dog002.jpg
...
cats/
cat001.jpg
cat002.jpg
...
In [8]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Model, Sequential
from keras.layers import Dropout, Flatten, Dense, Input
In [9]:
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
In [10]:
input_tensor = Input(shape=(img_width, img_height, 3))
base_model = applications.VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
In [11]:
base_model.summary()
In [12]:
# would be (None, None, 512), but this is not specific enough for Flatten layer further down...
bottleneck_output_shape = base_model.output_shape[1:]
In [13]:
# so, we manually set this to the dimension we know it really has from previous step
bottleneck_output_shape = (4, 4, 512)
In [14]:
# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=bottleneck_output_shape))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))
In [15]:
top_model.summary()
In [16]:
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model_weights_path = 'bottleneck_fc_model.h5'
top_model.load_weights(top_model_weights_path)
In [17]:
model = Model(input=base_model.input, output=top_model(base_model.output))
In [18]:
model.layers
Out[18]:
In [19]:
len(model.layers)
Out[19]:
In [20]:
first_conv_layer = model.layers[1]
In [21]:
first_conv_layer.trainable
Out[21]:
In [22]:
first_max_pool_layer = model.layers[3]
first_max_pool_layer.trainable
Out[22]:
In [23]:
# set the first 15 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
# so, the general features are kept and we (hopefully) do not have overfitting
non_trainable_layers = model.layers[:15]
In [24]:
non_trainable_layers
Out[24]:
In [25]:
for layer in non_trainable_layers:
layer.trainable = False
In [26]:
first_max_pool_layer.trainable
Out[26]:
In [27]:
first_conv_layer.trainable
Out[27]:
In [28]:
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate
# make updates very small and non adaptive so we do not ruin previous learnings
model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
metrics=['accuracy'])
In [29]:
model.summary()
In [30]:
# this might actually take a while even on GPU
# ~ 92% validation accuracy seems to be realistic
epochs = 50
batch_size = 16
In [31]:
# ... and viz progress in tensorboard to see what is going on
!rm -rf tf_log/
tb_callback = keras.callbacks.TensorBoard(log_dir='./tf_log')
In [32]:
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='binary')
In [33]:
# due to very small learning rate
# takes ~ 30s per epoch on AWS K80, with 50 epochs: ~ 30 minutes
# on GPU might take up to 20 times more
# fine-tune the model
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size,
callbacks=[tb_callback])
Out[33]:
In [34]:
model.save('models/cat-dog-vgg-retrain.hdf5')
In [ ]: