In [1]:
%cd /home/ubuntu/kaggle/state-farm-distracted-driver-detection
# Make sure you are in the main directory (state-farm-distracted-driver-detection)
%pwd
Out[1]:
In [2]:
# Create references to key directories
import os, sys
from glob import glob
from matplotlib import pyplot as plt
import numpy as np
import keras
np.set_printoptions(precision=4, linewidth=100)
current_dir = os.getcwd()
CHALLENGE_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'
In [3]:
#Allow relative imports to directories
sys.path.insert(1, os.path.join(sys.path[0], '..'))
#import modules
from utils import *
from utils.vgg16 import Vgg16
import utils; reload(utils)
from utils import *
from utils.utils import *
#Instantiate plotting tool
%matplotlib inline
In [4]:
#Need to correctly import utils.py
import bcolz
from numpy.random import random, permutation
In [5]:
%cd $DATA_HOME_DIR
path = DATA_HOME_DIR + '/'
test_path = path + 'test/'
results_path= path + 'results/'
train_path=path + 'train/'
valid_path=path + 'valid/'
In [6]:
#Set constants. You can experiment with no_of_epochs to improve the model
batch_size=64
no_of_epochs=3
In [7]:
batches = get_batches(train_path, batch_size=batch_size)
val_batches = get_batches(valid_path, batch_size=batch_size*2, shuffle=False)
In [8]:
(val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames,
test_filename) = get_classes(path)
The previous model used in the sample data should work better with more data. Lets try it out
In [11]:
def simple_conv(batches):
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,224,224)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dense(10, activation='softmax')
])
model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
model.optimizer.lr = 0.001
model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
return model
In [13]:
model = simple_conv(batches)
In [14]:
model.save_weights(path+'models/simple_conv.h5')
In [9]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.1,
shear_range=0.1, channel_shift_range=25, width_shift_range=0.1)
da_batches = get_batches(train_path, gen_t, batch_size=batch_size)
In [12]:
model = simple_conv(da_batches)
In [13]:
model.save_weights(path+'models/simple_conv_da_1.h5')
In [14]:
model.optimizer.lr = 0.0001
model.fit_generator(da_batches, da_batches.nb_sample, nb_epoch=4, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Out[14]:
In [15]:
model.save_weights(path+'models/simple_conv_da_2.h5')
If the results are still unstable - the validation accuracy jumps from epoch to epoch, creating a deeper model with dropout will help.
Create a Deeper model with dropout
In [9]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.1,
shear_range=0.1, channel_shift_range=25, width_shift_range=0.1)
batches = get_batches(train_path, gen_t, batch_size=batch_size)
In [10]:
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,224,224)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D(),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D(),
Convolution2D(128,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D(),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(200, activation='relu'),
BatchNormalization(),
Dropout(0.5),
Dense(10, activation='softmax')
])
In [11]:
model.compile(Adam(lr=10e-5), loss='categorical_crossentropy', metrics=['accuracy'])
In [19]:
model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Out[19]:
In [20]:
model.save_weights(path+'models/deep_conv_da_1.h5')
In [12]:
model.load_weights(path+'models/deep_conv_da_1.h5')
The model is underfitting, lets increase the learning rate
In [13]:
model.optimizer.lr=0.001
model.fit_generator(batches, batches.nb_sample, nb_epoch=10, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Out[13]:
In [14]:
model.save_weights(path+'models/deep_conv_da_2.h5')
If the model was overfitting, you would need to decrease the learning rate.
Let me decrease the learning rate and see if we get better results
In [15]:
model.optimizer.lr=0.00001
model.fit_generator(batches, batches.nb_sample, nb_epoch=5, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
Out[15]:
In [16]:
model.save_weights(path+'models/deep_conv_da_3.h5')
The accuracy is similar and there is more stability. However, its try with VGG16 model
Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful - in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all. So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout. (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)
In [ ]:
vgg = Vgg16()
model=vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx+1]
In [ ]:
conv_model = Sequential(conv_layers)
In [ ]:
# Lets pre-compute the features. Thus, shuffle should be set to False
batches = get_batches(train_path, batch_size=batch_size, shuffle=False)
In [ ]:
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
In [ ]:
# Compute features for the conv layers for the training, validation, and test data
conv_feat = conv_model.predict_generator(batches, batches.nb_sample)
conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)
conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
In [ ]:
# save the features for future use
save_array(path+'results/conv_val_feat.dat', conv_val_feat)
save_array(path+'results/conv_test_feat.dat', conv_test_feat)
save_array(path+'results/conv_feat.dat', conv_feat)
In [ ]:
conv_val_feat.shape
Create a network that would sit under the prior conv layers to predict the 10 classes. This is a simplified version on the VGG's dense layers
In [ ]:
def get_bn_layers(p):
return [
MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
Flatten(),
Dropout(p/2),
Dense(128, activation='relu'),
BatchNormalization(),
Dropout(p/2),
Dense(128, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(10, activation='softmax')
]
In [ ]:
p=0.8
In [ ]:
bn_model = Sequential(get_bn_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
In [ ]:
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels)
In [ ]:
bn_model.optimizer.lr = 0.01
In [ ]:
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=2,
validation_data=(conv_val_feat, val_labels))
In [ ]:
bn_model.save_weights(path+'models/bn_dense.h5')
Lets add the augmented data and adding larger dense layers, and therefore more dropout to the pre-trained model
In [ ]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.1,
shear_range=0.1, channel_shift_range=25, width_shift_range=0.1)
batches = get_batches(train_path, gen_t, batch_size=batch_size, shuffle=False)
Create a dataset of convolutional features that is 5x bigger than the original training set (5 variations of data augmentation from the ImageDataGenerator)
In [ ]:
da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*5)
In [ ]:
save_array(path+'results/da_conv_feat.dat', da_conv_feat)
Add the real training data in its non-augmented form
In [ ]:
da_conv_feat = np.concatenate([da_conv_feat, conv_feat])
In [ ]:
# Since we've now gotten a dataset 6x bigger than before, we'll need to copy our labels 6x too
da_trn_labels = np.concatenate([trn_labels]*6)
In [ ]:
def get_bn_da_layers(p):
return [
MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
Flatten(),
Dropout(p),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(10, activation='softmax')
]
In [ ]:
p=0.8
In [ ]:
bn_model = Sequential(get_bn_da_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
In [ ]:
# Lets train the model with the larger set of pre-computed augemented data
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels))
In [ ]:
bn_model.optimizer.lr=0.01
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
In [ ]:
bn_model.optimizer.lr=0.0001
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
In [ ]:
bn_model.save_weights(path+'models/bn_da_dense.h5')
Try using a combination of pseudo labeling and knowledge distillation to allow us to use unlabeled data (i.e. do semi-supervised learning). For our initial experiment we'll use the validation set as the unlabeled data, so that we can see that it is working without using the test set
In [ ]:
val_pseudo = bn_model.predict(conv_val_feat, batch_size=batch_size)
In [ ]:
# Concatenate them with the original training set
comb_pseudo = np.concatenate([da_trn_labels, val_pseudo])
In [ ]:
comb_feat = np.concatenate([da_conv_feat, conv_val_feat])
In [ ]:
# fine-tune the model using this combined training set
bn_model.load_weights(path+'models/bn_da_dense.h5')
In [ ]:
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels))
In [ ]:
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
In [ ]:
bn_model.optimizer.lr=0.00001
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4,
validation_data=(conv_val_feat, val_labels))
In [ ]:
# There is a distinct improvement - altough the validation set isn't large.
# A sigfniicant improvement can be found when using the test data
bn_model.save_weights(path+'models/bn-ps8.h5')
In [15]:
test_batches = get_batches(test_path, shuffle=False, batch_size=batch_size)
In [16]:
preds = model.predict_generator(test_batches, test_batches.nb_sample)
In [17]:
preds[:2]
Out[17]:
In [18]:
def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)
In [19]:
keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.93)).eval()
In [20]:
subm = do_clip(preds,0.93)
In [31]:
subm_name = path+'results/subm.csv'
In [22]:
classes = sorted(batches.class_indices, key=batches.class_indices.get)
In [34]:
submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'img', [a[8:] for a in test_filename])
submission.head()
Out[34]:
In [35]:
submission.tail()
Out[35]:
In [36]:
submission.to_csv(subm_name, index=False, encoding='utf-8')
In [37]:
FileLink(subm_name)
Out[37]:
In [ ]: