Image Classification of Dogs vs. Cats Using CNN Ensemble

Imports & environment


In [1]:
import os
import numpy as np

from glob import glob
from shutil import copyfile
from vgg_bn import Vgg16BN
from keras.callbacks import ModelCheckpoint

ROOT_DIR = os.getcwd()
DATA_HOME_DIR = ROOT_DIR + '/data'
%matplotlib inline


Using Theano backend.
Using gpu device 0: GeForce GTX 980M (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 5105)
/home/robert/anaconda3/lib/python3.5/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
/home/robert/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/home/robert/anaconda3/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

Config & Hyperparameters


In [10]:
# paths
data_path = DATA_HOME_DIR + '/' 
train_path = data_path + '/train/'
valid_path = data_path + '/valid/'
test_path = DATA_HOME_DIR + '/test/'
model_path = ROOT_DIR + '/models/'
submission_path = ROOT_DIR + '/submissions/'

# data
img_width, img_height = 224, 224
batch_size = 64
nb_train_samples = 23000
nb_valid_samples = 2000
nb_test_samples = 12500
classes = ["cats", "dogs"]
n_classes = len(classes)

# model
nb_epoch = 10
nb_aug = 5
lr = 0.001

Build the VGG model w/ Batch Normalization


In [3]:
vgg = Vgg16BN(size=(img_width, img_height), n_classes=n_classes, batch_size=batch_size, lr=lr)
model = vgg.model

model.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
lambda_1 (Lambda)                (None, 3, 224, 224)   0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 226, 226)   0           lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 64, 224, 224)  0           zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 64, 226, 226)  0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 64, 224, 224)  0           zeropadding2d_2[0][0]            
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 64, 112, 112)  0           convolution2d_2[0][0]            
____________________________________________________________________________________________________
zeropadding2d_3 (ZeroPadding2D)  (None, 64, 114, 114)  0           maxpooling2d_1[0][0]             
____________________________________________________________________________________________________
convolution2d_3 (Convolution2D)  (None, 128, 112, 112) 0           zeropadding2d_3[0][0]            
____________________________________________________________________________________________________
zeropadding2d_4 (ZeroPadding2D)  (None, 128, 114, 114) 0           convolution2d_3[0][0]            
____________________________________________________________________________________________________
convolution2d_4 (Convolution2D)  (None, 128, 112, 112) 0           zeropadding2d_4[0][0]            
____________________________________________________________________________________________________
maxpooling2d_2 (MaxPooling2D)    (None, 128, 56, 56)   0           convolution2d_4[0][0]            
____________________________________________________________________________________________________
zeropadding2d_5 (ZeroPadding2D)  (None, 128, 58, 58)   0           maxpooling2d_2[0][0]             
____________________________________________________________________________________________________
convolution2d_5 (Convolution2D)  (None, 256, 56, 56)   0           zeropadding2d_5[0][0]            
____________________________________________________________________________________________________
zeropadding2d_6 (ZeroPadding2D)  (None, 256, 58, 58)   0           convolution2d_5[0][0]            
____________________________________________________________________________________________________
convolution2d_6 (Convolution2D)  (None, 256, 56, 56)   0           zeropadding2d_6[0][0]            
____________________________________________________________________________________________________
zeropadding2d_7 (ZeroPadding2D)  (None, 256, 58, 58)   0           convolution2d_6[0][0]            
____________________________________________________________________________________________________
convolution2d_7 (Convolution2D)  (None, 256, 56, 56)   0           zeropadding2d_7[0][0]            
____________________________________________________________________________________________________
maxpooling2d_3 (MaxPooling2D)    (None, 256, 28, 28)   0           convolution2d_7[0][0]            
____________________________________________________________________________________________________
zeropadding2d_8 (ZeroPadding2D)  (None, 256, 30, 30)   0           maxpooling2d_3[0][0]             
____________________________________________________________________________________________________
convolution2d_8 (Convolution2D)  (None, 512, 28, 28)   0           zeropadding2d_8[0][0]            
____________________________________________________________________________________________________
zeropadding2d_9 (ZeroPadding2D)  (None, 512, 30, 30)   0           convolution2d_8[0][0]            
____________________________________________________________________________________________________
convolution2d_9 (Convolution2D)  (None, 512, 28, 28)   0           zeropadding2d_9[0][0]            
____________________________________________________________________________________________________
zeropadding2d_10 (ZeroPadding2D) (None, 512, 30, 30)   0           convolution2d_9[0][0]            
____________________________________________________________________________________________________
convolution2d_10 (Convolution2D) (None, 512, 28, 28)   0           zeropadding2d_10[0][0]           
____________________________________________________________________________________________________
maxpooling2d_4 (MaxPooling2D)    (None, 512, 14, 14)   0           convolution2d_10[0][0]           
____________________________________________________________________________________________________
zeropadding2d_11 (ZeroPadding2D) (None, 512, 16, 16)   0           maxpooling2d_4[0][0]             
____________________________________________________________________________________________________
convolution2d_11 (Convolution2D) (None, 512, 14, 14)   0           zeropadding2d_11[0][0]           
____________________________________________________________________________________________________
zeropadding2d_12 (ZeroPadding2D) (None, 512, 16, 16)   0           convolution2d_11[0][0]           
____________________________________________________________________________________________________
convolution2d_12 (Convolution2D) (None, 512, 14, 14)   0           zeropadding2d_12[0][0]           
____________________________________________________________________________________________________
zeropadding2d_13 (ZeroPadding2D) (None, 512, 16, 16)   0           convolution2d_12[0][0]           
____________________________________________________________________________________________________
convolution2d_13 (Convolution2D) (None, 512, 14, 14)   0           zeropadding2d_13[0][0]           
____________________________________________________________________________________________________
maxpooling2d_5 (MaxPooling2D)    (None, 512, 7, 7)     0           convolution2d_13[0][0]           
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 25088)         0           maxpooling2d_5[0][0]             
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 4096)          0           flatten_1[0][0]                  
____________________________________________________________________________________________________
batchnormalization_1 (BatchNormal(None, 4096)          0           dense_1[0][0]                    
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 4096)          0           batchnormalization_1[0][0]       
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 4096)          0           dropout_1[0][0]                  
____________________________________________________________________________________________________
batchnormalization_2 (BatchNormal(None, 4096)          0           dense_2[0][0]                    
____________________________________________________________________________________________________
dropout_2 (Dropout)              (None, 4096)          0           batchnormalization_2[0][0]       
____________________________________________________________________________________________________
dense_4 (Dense)                  (None, 2)             8194        dropout_2[0][0]                  
====================================================================================================
Total params: 8194
____________________________________________________________________________________________________

In [12]:
info_string = "{0}x{1}_{2}epoch_{3}aug_{4}lr_vgg16-bn".format(img_width, img_height, nb_epoch, nb_aug, lr)
ckpt_fn = model_path + '{val_loss:.2f}-loss_' + info_string + '.h5'

ckpt = ModelCheckpoint(filepath=ckpt_fn,
                      monitor='val_loss',
                      save_best_only=True,
                      save_weights_only=True)

Train the Model


In [13]:
vgg.fit(train_path, valid_path,
          nb_trn_samples=nb_train_samples,
          nb_val_samples=nb_valid_samples,
          nb_epoch=nb_epoch,
          callbacks=[ckpt],
          aug=nb_aug)

Predict on Test Data


In [11]:
# generate predictions
for aug in range(nb_aug):
    print("Generating predictions for Augmentation {0}...",format(aug+1))
    if aug == 0:
        predictions, filenames = vgg.test(test_path, nb_test_samples, aug=nb_aug)
    else:
        aug_pred, filenames = vgg.test(test_path, nb_test_samples, aug=nb_aug)
        predictions += aug_pred

print("Averaging Predictions Across Augmentations...")
predictions /= nb_aug


Generating predictions for Augmentation... 0
Found 12500 images belonging to 1 classes.
Generating predictions for Augmentation... 1
Found 12500 images belonging to 1 classes.
Generating predictions for Augmentation... 2
Found 12500 images belonging to 1 classes.
Generating predictions for Augmentation... 3
Found 12500 images belonging to 1 classes.
Generating predictions for Augmentation... 4
Found 12500 images belonging to 1 classes.
Averaging Predictions Across Augmentations...

In [14]:
# clip predictions
c = 0.01
preds = np.clip(predictions, c, 1-c)

In [15]:
sub_file = submission_path + info_string + '.csv'

with open(sub_file, 'w') as f:
    print("Writing Predictions to CSV...")
    f.write('id,label\n')
    for i, image_name in enumerate(filenames):
        pred = ['%.6f' % p for p in preds[i, :]]
        if i % 2500 == 0:
            print(i, '/', nb_test_samples)
        f.write('%s,%s\n' % (os.path.basename(image_name).replace('.jpg', ''), (pred[1])))
    print("Done.")


Writing Predictions to CSV...
0 / 12500
2500 / 12500
5000 / 12500
7500 / 12500
10000 / 12500
Done.