New 'clean-pass' of L3HW-SF ~ usin' lessons learned
Wayne Nixalo - 2017-May-23 02:37 -- May-25 00:00
A lot was learned here. Convolutional features, saving them, supplying them in batches from a generator. That fact that model.fit(..) requires one-hot encoded classes (labels) whereas model.fit_generator(..) just needs to know the amount of samples or batches it's getting.
How fast a purely Dense model is compared to a Convolutional model -- very big motivation for precomputing convolutional features if that can be done. In this case ~990-1090 seconds CNN vs. 81 seconds FCN.
Point of care about data-augmentation, & shuffling. Note to keep in mind memory limits, especially when initializing new Convolutional models.
I may come back to this notebook to redo it proper.
useful links: DataAugmentation: https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson3.ipynb
I forgot but reference anyway: https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson2.ipynb
Good followthru of lecture & how to save to submission w/ Pandas: https://github.com/philippbayer/cats_dogs_redux/blob/master/Statefarm.ipynb
Me: https://github.com/WNoxchi/Kaukasos/blob/master/FAI/lesson3/L3HW_SF.ipynb
In [1]:
import keras
import bcolz
import os, sys
import numpy as np
import pandas as pd
from glob import glob
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.layers.convolutional import Convolution2D
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.models import Sequential
sys.path.insert(1, os.path.join(os.getcwd(), '../utils'))
import utils
from vgg16bn import Vgg16BN
In [2]:
HOME_DIR = os.getcwd()
DATA_DIR = HOME_DIR + '/data'
TEST_DIR = DATA_DIR + '/test'
TRAIN_DIR = DATA_DIR + '/train'
VALID_DIR = DATA_DIR + '/valid'
data_path = DATA_DIR + '/'
test_path = TEST_DIR + '/'
train_path = TRAIN_DIR + '/'
valid_path = VALID_DIR + '/'
results_path = DATA_DIR + '/results/'
In [3]:
def save_array(fname, arr): c=bcolz.carray(arr, rootdir=fname, mode='w'); c.flush()
def load_array(fname): return bcolz.open(fname)[:]
def reset_valid(verbose=1):
"""Moves all images in validation set back to
their respective classes in the training set."""
counter = 0
%cd $valid_path
for i in xrange(10):
%cd c"$i"
g = glob('*.jpg')
for n in xrange(len(g)):
os.rename(g[n], TRAIN_DIR + '/c' + str(i) + '/' + g[n])
counter += 1
%cd ..
if verbose: print("Moved {} files".format(counter))
# modified from: http://forums.fast.ai/t/statefarm-kaggle-comp/183/20
def set_valid(number=1, verbose=1):
"""Moves <number> subjects from training to validation
directories. Verbosity 0: Silent; 1: print no. files moved;
2: print each move operation. Default=1"""
if number < 0: number = 0
# repeat for <number> subjects
for n in xrange(number):
# read CSV file into Pandas DataFrame
dil = pd.read_csv(data_path + 'driver_imgs_list.csv')
# grouped frame by subject in image
grouped_subjects = dil.groupby('subject')
# pick subject at random
subject = grouped_subjects.groups.keys()[np.random.randint(0, \
high=len(grouped_subjects.groups)-1)]
# get group assoc w/ subject
group = grouped_subjects.get_group(subject)
# loop over group & move imgs to validation dir
counter = 0
for (subject, clssnm, img) in group.values:
source = '{}train/{}/{}'.format(data_path, clssnm, img)
target = source.replace('train', 'valid')
if verbose > 1: print('mv {} {}'.format(source, target))
os.rename(source, target)
counter += 1
if verbose: print("Files moved: {}".format(counter))
# function to build FCNet w/ BatchNormalization & Dropout
def create_FCbn_layers(p=0):
return [
MaxPooling2D(input_shape=Conv_model.layers[-1].output_shape[1:]),
# MaxPooling2D(),
Flatten(),
BatchNormalization(),
Dense(4096, activation='relu'),
Dropout(p),
Dense(10, activation='softmax')
]
In [4]:
# os.mkdir(VAL_DIR)
# for i in xrange(10):
# os.mkdir(VAL_DIR + '/c' + str(i))
# # # another way to do this:
# # %mkdir $VAL_DIR
# # for i in xrange(10):
# # %mkdir $VAL_DIR/c"$i"
In [6]:
reset_valid()
set_valid(number=3)
In [4]:
batch_size = 32
target_size = (224, 224)
In [11]:
gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05,
height_shift_range=0.05, zoom_range=0.1,
shear_range=0.1, channel_shift_range=10)
# does it matter that I don't set dim_ordering='tf'?
trn_batches = gen.flow_from_directory(train_path, target_size=target_size,
batch_size=batch_size, shuffle=False, class_mode='categorical')
val_batches = gen.flow_from_directory(valid_path, target_size=target_size,
batch_size=batch_size, shuffle=False, class_mode='categorical')
In [34]:
??utils.get_classes
In [5]:
VGGbn = Vgg16BN()
In [6]:
VGGbn.model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
I just learned that the utils.vgg_ft_bn(num) function called in the JNB I was referencing is just initializes a model, and finetunes it in the standard way: pop off last layer, set all remaining layers to un-trainable, add a single softmax output FC layer, and compile. So as an experiment... how bad of an idea would, say, doing that & training the output layer then training all layers including convolutionals, be?
That kind of messes with the cleaned-up flow of this notebook, but that's fine.
Answer: it's pretty bad. Even with a batch size of 1 the GPU still runs out of memory, if barely. Perhaps using non-augmented data would spare a bit of memory.
In [8]:
last_conv_idx = [index for index, layer in enumerate(VGGbn.model.layers) \
if type(layer) is Convolution2D][-1]
Conv_layers = VGGbn.model.layers[:last_conv_idx + 1]
Conv_model = Sequential(Conv_layers)
In [17]:
conv_features = Conv_model.predict_generator(trn_batches, trn_batches.nb_sample)
conv_val_feat = Conv_model.predict_generator(val_batches, val_batches.nb_sample)
In [9]:
conv_features = load_array(results_path + 'conv_features.bc')
conv_val_feat = load_array(results_path + 'conv_val_feat.bc')
So, you can save the features at this point. For use later, and also to pull them in batches (use image.ImageDataGenerator() and gen.flow_from_directory(shuffle=False). But I'll already have them all in memory when the convolutional model is done, and I'm going to run a differently-randomly-augmented batch of (unshuffled) data through each time, so there isn't a reason to precompute & save them.
In [ ]:
# # optional: save the convolutional model's output features
# save_array(results_path + 'conv_features.dat', conv_features)
# save_array(results_path + 'conv_val_feat.dat', conv_val_feat)
In [10]:
FC_model = Sequential(create_FCbn_layers(p=0.3))
FC_model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
In [13]:
(val_classes, trn_classes, val_labels, trn_labels,
validation_filenames, training_filenames, testing_filenames) = utils.get_classes(data_path)
In [39]:
save_array(results_path + 'conv_features.bc', conv_features)
save_array(results_path + 'conv_val_feat.bc', conv_val_feat)
In [41]:
??utils.get_batches
In [16]:
# each epoch on GTX870M ~18 minutes
# wow, no, ~18 min for CNN. FCNet only is ~ 1 minute!
# FC_model.fit(conv_features, trn_labels, batch_size=batch_size,
# nb_epoch=1, validation_data=(conv_val_feat, val_labels))
FC_model.optimizer.lr=1e-1
FC_model.fit(conv_features, trn_labels, batch_size=batch_size,
nb_epoch=1, validation_data=(conv_val_feat, val_labels))
FC_model.optimizer.lr=1e-2
FC_model.fit(conv_features, trn_labels, batch_size=batch_size,
nb_epoch=1, validation_data=(conv_val_feat, val_labels))
Out[16]:
In [18]:
??utils.to_categorical
In [19]:
from utils import to_categorical as onehot
FC_model.optimizer.lr=1e-4
FC_model.fit(conv_features, onehot(trn_batches.classes), batch_size=batch_size,
nb_epoch=4, validation_data=(conv_val_feat, onehot(val_batches.classes)))
FC_model.optimizer.lr=1e-4
FC_model.fit(conv_features, onehot(trn_batches.classes), batch_size=batch_size,
nb_epoch=8, validation_data=(conv_val_feat, onehot(val_batches.classes)))
FC_model.optimizer.lr=1e-6
FC_model.fit(conv_features, onehot(trn_batches.classes), batch_size=batch_size,
nb_epoch=12, validation_data=(conv_val_feat, onehot(val_batches.classes)))
Out[19]:
In [ ]:
gen = image.ImageDataGeneratro()
tst_batches = gen.flow_from_directory(test_path, batch_size=batch_size,
shuffle=False, class_mode=None)
In [ ]:
conv_tst_feat = Conv_model.predict_generator(tst_batches, tst_batches.nb_sample)
preds = FC_model.predict(conv_tst_feat, batch_size=batch_size*2)
In [ ]:
filenames = tst_batches.filenames
classes = sorted(trn_batches.class_indices, key=trn_batches.class_indices.get)
submission = pd.DataFrame(preds, columns=classes)
submission.insert(0, 'img', [f[8:] for f in filenames])
submission.head()
submission.to_csv(results_path + 'submission.csv', index=False, compression=None)
from IPython.display import FileLink
FileLink(results_path + 'submission.csv')
Once the above is working:
Work-flow:
To save GPU memory (no easy way to free it yet in JNB) init a single Convolutional and Fully-Connected Net model. The convolutional layer weights will not be trained, only used to produce features, so only the FC-model will have it's weights saved/reinitialized (if I can re-init without increasing memory load, I'll do that).
For each iteration of the ensemble, a randomly-augmented set of data will be passed through the ConvNet, and those features will be used to train a fresh FC Net. After the training phase, The Conv & FC nets will be used to create a set of predictions which will be stored in an array and returned. That array is then averaged into a single list of predictions, clipped & renormalized, then saved to be uploaded as the final submission.
In [ ]:
def train_FCNet():
FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size,
nb_epoch=1, validation_data=(conv_val_feat, val_batches.labels))
FC_model.optimizer.lr=1e-1
FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size,
nb_epoch=1, validation_data=(conv_val_feat, val_batches.labels))
FC_model.optimizer.lr=1e-2
FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size,
nb_epoch=2, validation_data=(conv_val_feat, val_batches.labels))
FC_model.optimizer.lr=1e-4
FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size,
nb_epoch=4, validation_data=(conv_val_feat, val_batches.labels))
FC_model.optimizer.lr=1e-6
FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size,
nb_epoch=8, validation_data=(conv_val_feat, val_batches.labels))
In [ ]:
def Ensemble(num_models=1):
VGGbn = Vgg16BN()
last_conv_idx = [index, for index, layer in enumerate(VGGbn.model.layers) \
if type(layer) is Convolution2D][-1]
Conv_layers = VGGbn.model.layers[:last_conv_idx + 1]
Conv_model = Sequential(Conv_layers)
gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05,
height_shift_range=0.05, zoom_range=0.1,
shear_range=0.1, channel_shift_range=10)
trn_batches = gen.flow_from_directory(train_path, target_size=target_size,
batch_size=batch_size, shuffle=False, class_mode='categorical')
val_batches = gen.flow_from_directory(valid_path, target_size=target_size,
batch_size=batch_size, shuffle=False, class_mode='categorical')
gen_t = image.ImageDataGeneratro()
tst_batches = gen.flow_from_directory(test_path, batch_size=batch_size,
shuffle=False, class_mode=None)
pred_array = []
for i in xrange(num_models):
# need to fix set_valid's bug
oserr = 1
while oserr:
oserr = 0
reset_valid()
try:
set_valid(number=3)
except OSError:
oserr = 1
conv_features = Conv_model.predict_generator(trn_batches, trn_batches.nb_sample)
conv_val_feat = Conv_model.predict_generator(val_batches, val_batches.nb_sample)
FC_model = Sequential(create_FCbn_layers(p=0.3))
FC_model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
train_FCNet()
conv_tst_feat = Conv_model.predict_generator(tst_batches, tst_batches.nb_sample)
preds = FC_model.predict(conv_tst_feat, batch_size=batch_size*2)
pred_array.append(preds)
return pred_array
In [ ]:
prediction_array = Ensemble(num_models=3)
save_array(results_path + 'ensemble_predictions.dat', prediction_array)
In [ ]:
emsemble_preds = np.stack([pred for pred in prediction_array])
preds = ensemble_preds.mean(axis=0)
In [ ]:
filenames = tst_batches.filenames
classes = sorted(trn_batches.class_indices, key=trn_batches.class_indices.get)
submission = pd.DataFrame(preds, columns=classes)
submission.insert(0, 'img', [f[8:] for f in filenames])
submission.head()
submission.to_csv(results_path + 'submission.csv', index=False, compression=None)
from IPython.display import FileLink
FileLink(results_path + 'submission.csv')