Lesson 1 & 2: Using Convolutional Neural Networks

Exercises from fast.ai

Author: Chris Shih


In [1]:
import tensorflow as tf

In [2]:
#path = 'data/dogscats/sample'
path = 'data/dogscats/'

In [3]:
import os
import json
from glob import glob
import numpy as np

from matplotlib import pyplot as plt
from matplotlib import image as mpimg
%matplotlib inline

In [4]:
from tensorflow.contrib.keras.python.keras.models import Model, Sequential
from tensorflow.contrib.keras.python.keras.layers import Conv2D, Dense, Flatten, Input, MaxPooling2D, Dropout
from tensorflow.contrib.keras.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.contrib.keras.python.keras.optimizers import Adam

Set up Structure for VGG 16


In [5]:
def vgg_preprocess(x):
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x

In [20]:
vgg = Sequential()

# Conv Block 1
vgg.add(Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', input_shape=(224, 224, 3)))
vgg.add(Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2'))
vgg.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block1_pool'))

# Conv Block 2
vgg.add(Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1'))
vgg.add(Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2'))
vgg.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block2_pool'))

# Conv Block 3
vgg.add(Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1'))
vgg.add(Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2'))
vgg.add(Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3'))
vgg.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block3_pool'))

# Conv Block 4
vgg.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1'))
vgg.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2'))
vgg.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3'))
vgg.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block4_pool'))

# Conv Block 5
vgg.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1'))
vgg.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2'))
vgg.add(Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3'))
vgg.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='block5_pool'))

# Full Connected Layers
vgg.add(Flatten(name='flatten'))
vgg.add(Dense(4096, activation='relu', name='fc1'))
vgg.add(Dropout(0.5, name='fc1_drop'))
vgg.add(Dense(4096, activation='relu', name='fc2'))
vgg.add(Dropout(0.5, name='fc2_drop'))
vgg.add(Dense(1000, activation='softmax', name='predictions'))

# Load VGG Weights
vgg.load_weights('data/vgg16_tf.h5')

In [21]:
vgg.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc1_drop (Dropout)           (None, 4096)              0         
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
fc2_drop (Dropout)           (None, 4096)              0         
_________________________________________________________________
predictions (Dense)          (None, 1000)              4097000   
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________

Remove Last Dense Layer (1000 ImageNet Classes) and add a Dense Layer for 2 Classes


In [22]:
vgg.pop()
for layer in vgg.layers:
    layer.trainable=False
vgg.add(Dense(2, activation='softmax', name='predictions'))

In [23]:
vgg.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc1_drop (Dropout)           (None, 4096)              0         
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
fc2_drop (Dropout)           (None, 4096)              0         
_________________________________________________________________
predictions (Dense)          (None, 2)                 8194      
=================================================================
Total params: 134,268,738
Trainable params: 8,194
Non-trainable params: 134,260,544
_________________________________________________________________

Train Cats vs. Dogs model on dataset in batches


In [24]:
batch_size = 32

In [25]:
datagen = ImageDataGenerator(
    preprocessing_function=vgg_preprocess,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    zoom_range=0.1,
    channel_shift_range=10,
    horizontal_flip=True)

batches = datagen.flow_from_directory(os.path.join(path,'train'), target_size=(224,224), 
                                      class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = datagen.flow_from_directory(os.path.join(path,'valid'), target_size=(224,224), 
                                      class_mode='categorical', shuffle=True, batch_size=batch_size)


Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.

Train last layer in updated VGG model (with a higher learning rate)

  • This is done in order to get the last layer's weights to be "in the ballpark" before retraining all dense layers

In [26]:
vgg.compile(optimizer=Adam(lr=0.001),loss='categorical_crossentropy',metrics=['accuracy'])

In [27]:
vgg.fit_generator(batches, steps_per_epoch=1000, epochs=2,
                  validation_data=val_batches, validation_steps=100, verbose=2)


Epoch 1/2
317s - loss: 0.2124 - acc: 0.9481 - val_loss: 0.1274 - val_acc: 0.9689
Epoch 2/2
314s - loss: 0.2032 - acc: 0.9583 - val_loss: 0.1609 - val_acc: 0.9694
Out[27]:
<tensorflow.contrib.keras.python.keras.callbacks.History at 0x7f09c5145898>

In [19]:
vgg.fit_generator(batches, steps_per_epoch=1000, epochs=5,
                  validation_data=val_batches, validation_steps=100, verbose=2)


Epoch 1/5
162s - loss: 0.9479 - acc: 0.9409 - val_loss: 0.8160 - val_acc: 0.9494
Epoch 2/5
158s - loss: 1.1939 - acc: 0.9257 - val_loss: 1.0779 - val_acc: 0.9331
Epoch 3/5
160s - loss: 1.0565 - acc: 0.9343 - val_loss: 0.9167 - val_acc: 0.9431
Epoch 4/5
161s - loss: 1.2665 - acc: 0.9212 - val_loss: 0.8361 - val_acc: 0.9481
Epoch 5/5
168s - loss: 0.9296 - acc: 0.9422 - val_loss: 0.7757 - val_acc: 0.9519
Out[19]:
<tensorflow.contrib.keras.python.keras.callbacks.History at 0x7f09c8544f28>

Re-train all dense layers excluding dropout (since we see there is underfitting of the dataset)


In [ ]:
vgg.fit_generator(batches, steps_per_epoch=1000, epochs=3,
                  validation_data=val_batches, validation_steps=100, verbose=2)

In [ ]:
vgg.save_weights('vgg_model_v2.h5')

In [ ]:
vgg.load_weights('vgg_model_v2.h5')

View predictions of Images in the Validation Set


In [ ]:
val_total = datagen.flow_from_directory(os.path.join(path,'valid'), target_size=(224, 224),
                                        class_mode=None, batch_size=1)

val_data = np.concatenate([val_total.next() for i in range(val_total.n)])

preds = vgg.predict_classes(val_data, batch_size=batch_size, verbose=2)
probs = vgg.predict_proba(val_data, batch_size=batch_size, verbose=2)[:,0]

filenames = val_total.filenames
val_classes = val_total.classes

In [ ]:
n_view = 4

In [ ]:
def plot_cats_dogs(n_view, idx):
    fig = plt.figure(figsize=(12,6))
    for i in range(n_view):
        fig.add_subplot(1, n_view, i+1)
        plt.imshow(mpimg.imread(os.path.join(path,'valid',filenames[idx[i]])))
        plt.title('prob: ' + str(probs[idx[i]]))

Correct Images (Both Cats and Dogs)


In [ ]:
correct = np.where(preds==val_classes)[0]
np.random.shuffle(correct)

corr_idx = correct[:n_view]

In [ ]:
plot_cats_dogs(n_view, corr_idx)

Incorrect Images (Both Cats and Dogs)


In [ ]:
incorrect = np.where(preds!=val_classes)[0]
np.random.shuffle(incorrect)

incorr_idx = incorrect[:n_view]

In [ ]:
plot_cats_dogs(n_view, incorr_idx)

Confident Cat Correct Classifications


In [ ]:
correct_cats = np.where((preds==0) & (preds==val_classes))[0]
most_correct_cats = np.argsort(probs[correct_cats])[::-1][:n_view]

In [ ]:
plot_cats_dogs(n_view, correct_cats[most_correct_cats])

Confident Dog Correct Classifications


In [ ]:
correct_dogs = np.where((preds==1) & (preds==val_classes))[0]
most_correct_dogs = np.argsort(probs[correct_dogs])[::-1][:n_view]

In [ ]:
plot_cats_dogs(n_view, correct_dogs[most_correct_dogs])

Most Confident Dogs, but were Cats


In [ ]:
incorrect_dogs = np.where((preds==1) & (preds!=val_classes))[0]
most_incorrect_dogs = np.argsort(probs[incorrect_dogs])[::-1][:n_view]

In [ ]:
plot_cats_dogs(n_view, incorrect_dogs[most_incorrect_dogs])

Most Confident Cats, but were Dogs


In [ ]:
incorrect_cats = np.where((preds==0) & (preds!=val_classes))[0]
most_incorrect_cats = np.argsort(probs[incorrect_cats])[::-1][:n_view]

In [ ]:
plot_cats_dogs(n_view, incorrect_cats[most_incorrect_cats])

Most Uncertain (i.e. probability close to 0.5)


In [ ]:
most_uncertain = np.argsort(np.abs(probs-0.5))

In [ ]:
plot_cats_dogs(n_view, most_uncertain)