New 'clean-pass' of L3HW-SF ~ usin' lessons learned

Wayne Nixalo - 2017-May-23 02:37 -- May-25 00:00

NOTE: Notebook incomplete.

  • A lot was learned here. Convolutional features, saving them, supplying them in batches from a generator. That fact that model.fit(..) requires one-hot encoded classes (labels) whereas model.fit_generator(..) just needs to know the amount of samples or batches it's getting.

  • How fast a purely Dense model is compared to a Convolutional model -- very big motivation for precomputing convolutional features if that can be done. In this case ~990-1090 seconds CNN vs. 81 seconds FCN.

  • Point of care about data-augmentation, & shuffling. Note to keep in mind memory limits, especially when initializing new Convolutional models.

I may come back to this notebook to redo it proper.

useful links: DataAugmentation: https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson3.ipynb

I forgot but reference anyway: https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson2.ipynb

Good followthru of lecture & how to save to submission w/ Pandas: https://github.com/philippbayer/cats_dogs_redux/blob/master/Statefarm.ipynb

Me: https://github.com/WNoxchi/Kaukasos/blob/master/FAI/lesson3/L3HW_SF.ipynb

Imports


In [1]:
import keras
import bcolz
import os, sys
import numpy as np
import pandas as pd
from glob import glob
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.layers.convolutional import Convolution2D
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPooling2D
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.models import Sequential

sys.path.insert(1, os.path.join(os.getcwd(), '../utils'))
import utils
from vgg16bn import Vgg16BN


Using Theano backend.
/home/wnixalo/miniconda3/envs/FAI/lib/python2.7/site-packages/theano/gpuarray/dnn.py:135: UserWarning: Your cuDNN version is more recent than Theano. If you encounter problems, try updating Theano or downgrading cuDNN to version 5.1.
  warnings.warn("Your cuDNN version is more recent than "
Using cuDNN version 6021 on context None
Mapped name None to device cuda: GeForce GTX 870M (0000:01:00.0)

Directory setup


In [2]:
HOME_DIR  = os.getcwd()
DATA_DIR  = HOME_DIR + '/data'
TEST_DIR  = DATA_DIR + '/test'
TRAIN_DIR = DATA_DIR + '/train'
VALID_DIR = DATA_DIR + '/valid'

data_path    = DATA_DIR  + '/'
test_path    = TEST_DIR  + '/'
train_path   = TRAIN_DIR + '/'
valid_path   = VALID_DIR + '/'
results_path = DATA_DIR  + '/results/'

Utility functions


In [3]:
def save_array(fname, arr): c=bcolz.carray(arr, rootdir=fname, mode='w'); c.flush()
def load_array(fname): return bcolz.open(fname)[:]

def reset_valid(verbose=1):
    """Moves all images in validation set back to 
    their respective classes in the training set."""
    counter = 0
    %cd $valid_path
    for i in xrange(10):
        %cd c"$i"
        g = glob('*.jpg')
        for n in xrange(len(g)):
            os.rename(g[n], TRAIN_DIR + '/c' + str(i) + '/' + g[n])
            counter += 1
        %cd ..
    if verbose: print("Moved {} files".format(counter))

# modified from: http://forums.fast.ai/t/statefarm-kaggle-comp/183/20
def set_valid(number=1, verbose=1):
    """Moves <number> subjects from training to validation 
    directories. Verbosity 0: Silent; 1: print no. files moved;
    2: print each move operation. Default=1"""
    if number < 0: number = 0
    # repeat for <number> subjects
    for n in xrange(number):
        # read CSV file into Pandas DataFrame
        dil = pd.read_csv(data_path + 'driver_imgs_list.csv')
        # grouped frame by subject in image
        grouped_subjects = dil.groupby('subject')
        # pick subject at random
        subject = grouped_subjects.groups.keys()[np.random.randint(0, \
                                         high=len(grouped_subjects.groups)-1)]
        # get group assoc w/ subject
        group = grouped_subjects.get_group(subject)
        # loop over group & move imgs to validation dir
        counter = 0
        for (subject, clssnm, img) in group.values:
            source = '{}train/{}/{}'.format(data_path, clssnm, img)
            target = source.replace('train', 'valid')
            if verbose > 1: print('mv {} {}'.format(source, target))
            os.rename(source, target)
            counter += 1
        if verbose: print("Files moved: {}".format(counter))
# function to build FCNet w/ BatchNormalization & Dropout
def create_FCbn_layers(p=0):
    return [
            MaxPooling2D(input_shape=Conv_model.layers[-1].output_shape[1:]),
#             MaxPooling2D(),
            Flatten(),
            BatchNormalization(),
            Dense(4096, activation='relu'),
            Dropout(p),
            Dense(10, activation='softmax')
           ]

Creating validation directories


In [4]:
# os.mkdir(VAL_DIR)
# for i in xrange(10):
#     os.mkdir(VAL_DIR + '/c' + str(i))

# # # another way to do this:
# # %mkdir $VAL_DIR
# # for i in xrange(10):
# #     %mkdir $VAL_DIR/c"$i"

Setting/resetting validation set


In [6]:
reset_valid()
set_valid(number=3)


/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c0
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c1
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c2
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c3
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c4
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c5
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c6
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c7
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c8
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid/c9
/home/wnixalo/Kaukasos/FAI/lesson3/data/valid
Moved 1869 files
Files moved: 790
Files moved: 591
Files moved: 1196

Parameters


In [4]:
batch_size = 32
target_size = (224, 224)

Train/valid batch generators

Training batches have to be set to not be shuffled. Since the full-model is in two stages, classes & labels will be supplied to the FCNet via the batches-generator; if these are shuffled they won't match up with the output features from the ConvNet. I think.


In [11]:
gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05, 
                                height_shift_range=0.05, zoom_range=0.1, 
                                   shear_range=0.1, channel_shift_range=10)
# does it matter that I don't set dim_ordering='tf'?
trn_batches = gen.flow_from_directory(train_path, target_size=target_size, 
                batch_size=batch_size, shuffle=False, class_mode='categorical')
val_batches = gen.flow_from_directory(valid_path, target_size=target_size, 
                batch_size=batch_size, shuffle=False, class_mode='categorical')


Found 19847 images belonging to 10 classes.
Found 2577 images belonging to 10 classes.

In [34]:
??utils.get_classes

Load VGG16BN model & its weights


In [5]:
VGGbn = Vgg16BN()

In [6]:
VGGbn.model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

I just learned that the utils.vgg_ft_bn(num) function called in the JNB I was referencing is just initializes a model, and finetunes it in the standard way: pop off last layer, set all remaining layers to un-trainable, add a single softmax output FC layer, and compile. So as an experiment... how bad of an idea would, say, doing that & training the output layer then training all layers including convolutionals, be?

That kind of messes with the cleaned-up flow of this notebook, but that's fine.

Answer: it's pretty bad. Even with a batch size of 1 the GPU still runs out of memory, if barely. Perhaps using non-augmented data would spare a bit of memory.

Separate Conv layers & create new ConvNet (w/ vgg weights)

Since I'm generating randomly-augmented data each batch, I can't precompute the Conv features. I'd have to experiment and see the speed vs. accuracy tradeoff.


In [8]:
last_conv_idx = [index for index, layer in enumerate(VGGbn.model.layers) \
                                            if type(layer) is Convolution2D][-1]
Conv_layers = VGGbn.model.layers[:last_conv_idx + 1]
Conv_model = Sequential(Conv_layers)

Run Conv Model on trn/val batches to create features as inputs to FCNet


In [17]:
conv_features = Conv_model.predict_generator(trn_batches, trn_batches.nb_sample)
conv_val_feat = Conv_model.predict_generator(val_batches, val_batches.nb_sample)

In [9]:
conv_features = load_array(results_path + 'conv_features.bc')
conv_val_feat = load_array(results_path + 'conv_val_feat.bc')

So, you can save the features at this point. For use later, and also to pull them in batches (use image.ImageDataGenerator() and gen.flow_from_directory(shuffle=False). But I'll already have them all in memory when the convolutional model is done, and I'm going to run a differently-randomly-augmented batch of (unshuffled) data through each time, so there isn't a reason to precompute & save them.


In [ ]:
# # optional: save the convolutional model's output features
# save_array(results_path + 'conv_features.dat', conv_features)
# save_array(results_path + 'conv_val_feat.dat', conv_val_feat)

Create Fully-Connected Net


In [10]:
FC_model = Sequential(create_FCbn_layers(p=0.3))
FC_model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
(val_classes, trn_classes, val_labels, trn_labels, 
    validation_filenames, training_filenames, testing_filenames) = utils.get_classes(data_path)


Found 19847 images belonging to 10 classes.
Found 2577 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.

In [39]:
save_array(results_path + 'conv_features.bc', conv_features)
save_array(results_path + 'conv_val_feat.bc', conv_val_feat)

In [41]:
??utils.get_batches

Train FCNet on ConvNet features


In [16]:
# each epoch on GTX870M ~18 minutes
# wow, no, ~18 min for CNN. FCNet only is ~ 1 minute!
# FC_model.fit(conv_features, trn_labels, batch_size=batch_size, 
#              nb_epoch=1, validation_data=(conv_val_feat, val_labels))
FC_model.optimizer.lr=1e-1
FC_model.fit(conv_features, trn_labels, batch_size=batch_size, 
             nb_epoch=1, validation_data=(conv_val_feat, val_labels))
FC_model.optimizer.lr=1e-2
FC_model.fit(conv_features, trn_labels, batch_size=batch_size, 
             nb_epoch=1, validation_data=(conv_val_feat, val_labels))


Train on 19847 samples, validate on 2577 samples
Epoch 1/1
19847/19847 [==============================] - 81s - loss: 7.5692 - acc: 0.5260 - val_loss: 8.2475 - val_acc: 0.4835
Train on 19847 samples, validate on 2577 samples
Epoch 1/1
19847/19847 [==============================] - 81s - loss: 7.1666 - acc: 0.5529 - val_loss: 8.7738 - val_acc: 0.4497
Out[16]:
<keras.callbacks.History at 0x7f13a8759e10>

In [18]:
??utils.to_categorical

In [19]:
from utils import to_categorical as onehot
FC_model.optimizer.lr=1e-4
FC_model.fit(conv_features, onehot(trn_batches.classes), batch_size=batch_size, 
             nb_epoch=4, validation_data=(conv_val_feat, onehot(val_batches.classes)))
FC_model.optimizer.lr=1e-4
FC_model.fit(conv_features, onehot(trn_batches.classes), batch_size=batch_size, 
             nb_epoch=8, validation_data=(conv_val_feat, onehot(val_batches.classes)))
FC_model.optimizer.lr=1e-6
FC_model.fit(conv_features, onehot(trn_batches.classes), batch_size=batch_size, 
             nb_epoch=12, validation_data=(conv_val_feat, onehot(val_batches.classes)))


Train on 19847 samples, validate on 2577 samples
Epoch 1/4
19847/19847 [==============================] - 81s - loss: 6.6582 - acc: 0.5843 - val_loss: 7.9138 - val_acc: 0.5072
Epoch 2/4
19847/19847 [==============================] - 81s - loss: 6.6488 - acc: 0.5862 - val_loss: 7.4051 - val_acc: 0.5382
Epoch 3/4
19847/19847 [==============================] - 81s - loss: 6.9273 - acc: 0.5688 - val_loss: 7.5799 - val_acc: 0.5289
Epoch 4/4
19847/19847 [==============================] - 81s - loss: 6.3319 - acc: 0.6065 - val_loss: 7.5594 - val_acc: 0.5301
Train on 19847 samples, validate on 2577 samples
Epoch 1/8
19847/19847 [==============================] - 81s - loss: 6.3226 - acc: 0.6069 - val_loss: 8.6045 - val_acc: 0.4649
Epoch 2/8
19847/19847 [==============================] - 81s - loss: 6.0437 - acc: 0.6240 - val_loss: 7.9359 - val_acc: 0.5072
Epoch 3/8
19847/19847 [==============================] - 80s - loss: 6.2177 - acc: 0.6134 - val_loss: 7.5561 - val_acc: 0.5305
Epoch 4/8
19847/19847 [==============================] - 80s - loss: 6.2718 - acc: 0.6100 - val_loss: 8.3782 - val_acc: 0.4796
Epoch 5/8
19847/19847 [==============================] - 80s - loss: 6.3415 - acc: 0.6058 - val_loss: 7.9296 - val_acc: 0.5068
Epoch 6/8
19847/19847 [==============================] - 80s - loss: 6.0521 - acc: 0.6238 - val_loss: 7.7263 - val_acc: 0.5204
Epoch 7/8
19847/19847 [==============================] - 80s - loss: 5.8950 - acc: 0.6339 - val_loss: 8.1752 - val_acc: 0.4924
Epoch 8/8
19847/19847 [==============================] - 80s - loss: 5.9502 - acc: 0.6304 - val_loss: 7.6494 - val_acc: 0.5250
Train on 19847 samples, validate on 2577 samples
Epoch 1/12
19847/19847 [==============================] - 80s - loss: 6.1565 - acc: 0.6176 - val_loss: 7.6712 - val_acc: 0.5231
Epoch 2/12
19847/19847 [==============================] - 81s - loss: 6.3666 - acc: 0.6046 - val_loss: 7.6187 - val_acc: 0.5270
Epoch 3/12
19847/19847 [==============================] - 80s - loss: 6.1146 - acc: 0.6200 - val_loss: 7.3156 - val_acc: 0.5460
Epoch 4/12
19847/19847 [==============================] - 81s - loss: 6.1163 - acc: 0.6200 - val_loss: 7.1232 - val_acc: 0.5576
Epoch 5/12
19847/19847 [==============================] - 80s - loss: 5.8694 - acc: 0.6355 - val_loss: 6.8897 - val_acc: 0.5720
Epoch 6/12
19847/19847 [==============================] - 80s - loss: 6.0104 - acc: 0.6267 - val_loss: 7.5773 - val_acc: 0.5297
Epoch 7/12
19847/19847 [==============================] - 80s - loss: 6.0409 - acc: 0.6247 - val_loss: 7.8233 - val_acc: 0.5142
Epoch 8/12
19847/19847 [==============================] - 80s - loss: 5.7253 - acc: 0.6445 - val_loss: 6.7477 - val_acc: 0.5813
Epoch 9/12
19847/19847 [==============================] - 80s - loss: 5.7190 - acc: 0.6449 - val_loss: 8.2757 - val_acc: 0.4858
Epoch 10/12
19847/19847 [==============================] - 80s - loss: 5.8177 - acc: 0.6389 - val_loss: 8.5814 - val_acc: 0.4676
Epoch 11/12
19847/19847 [==============================] - 81s - loss: 5.7796 - acc: 0.6412 - val_loss: 7.8501 - val_acc: 0.5126
Epoch 12/12
19847/19847 [==============================] - 81s - loss: 6.0719 - acc: 0.6231 - val_loss: 8.4187 - val_acc: 0.4777
Out[19]:
<keras.callbacks.History at 0x7f13a8704bd0>

Non-Augmented batch generator for test-data


In [ ]:
gen = image.ImageDataGeneratro()
tst_batches = gen.flow_from_directory(test_path, batch_size=batch_size,
                                      shuffle=False, class_mode=None)

Run test batches through ConvNet, run ConvNet test features through FCNet


In [ ]:
conv_tst_feat = Conv_model.predict_generator(tst_batches, tst_batches.nb_sample)
preds = FC_model.predict(conv_tst_feat, batch_size=batch_size*2)

Save results


In [ ]:
filenames = tst_batches.filenames
classes = sorted(trn_batches.class_indices, key=trn_batches.class_indices.get)
submission = pd.DataFrame(preds, columns=classes)
submission.insert(0, 'img', [f[8:] for f in filenames])
submission.head()
submission.to_csv(results_path + 'submission.csv', index=False, compression=None)

from IPython.display import FileLink
FileLink(results_path + 'submission.csv')

Once the above is working:

Ensemble

Work-flow:

To save GPU memory (no easy way to free it yet in JNB) init a single Convolutional and Fully-Connected Net model. The convolutional layer weights will not be trained, only used to produce features, so only the FC-model will have it's weights saved/reinitialized (if I can re-init without increasing memory load, I'll do that).

For each iteration of the ensemble, a randomly-augmented set of data will be passed through the ConvNet, and those features will be used to train a fresh FC Net. After the training phase, The Conv & FC nets will be used to create a set of predictions which will be stored in an array and returned. That array is then averaged into a single list of predictions, clipped & renormalized, then saved to be uploaded as the final submission.


In [ ]:
def train_FCNet():
    FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size, 
                 nb_epoch=1, validation_data=(conv_val_feat, val_batches.labels))
    FC_model.optimizer.lr=1e-1
    FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size, 
                 nb_epoch=1, validation_data=(conv_val_feat, val_batches.labels))
    FC_model.optimizer.lr=1e-2
    FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size, 
                 nb_epoch=2, validation_data=(conv_val_feat, val_batches.labels))
    FC_model.optimizer.lr=1e-4
    FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size, 
                 nb_epoch=4, validation_data=(conv_val_feat, val_batches.labels))
    FC_model.optimizer.lr=1e-6
    FC_model.fit(conv_features, trn_batches.labels, batch_size=batch_size, 
                 nb_epoch=8, validation_data=(conv_val_feat, val_batches.labels))

In [ ]:
def Ensemble(num_models=1):
    VGGbn = Vgg16BN()
    last_conv_idx = [index, for index, layer in enumerate(VGGbn.model.layers) \
                                            if type(layer) is Convolution2D][-1]
    Conv_layers = VGGbn.model.layers[:last_conv_idx + 1]
    Conv_model = Sequential(Conv_layers)
    
    gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05, 
                                       height_shift_range=0.05, zoom_range=0.1, 
                                           shear_range=0.1, channel_shift_range=10)
    trn_batches = gen.flow_from_directory(train_path, target_size=target_size, 
                    batch_size=batch_size, shuffle=False, class_mode='categorical')
    val_batches = gen.flow_from_directory(valid_path, target_size=target_size, 
                    batch_size=batch_size, shuffle=False, class_mode='categorical')
    
    gen_t = image.ImageDataGeneratro()
    tst_batches = gen.flow_from_directory(test_path, batch_size=batch_size,
                                          shuffle=False, class_mode=None)
    
    pred_array = []
    for i in xrange(num_models):
        # need to fix set_valid's bug
        oserr = 1
        while oserr:
            oserr = 0
            reset_valid()
            try:
                set_valid(number=3)
            except OSError:
                oserr = 1
        
        conv_features = Conv_model.predict_generator(trn_batches, trn_batches.nb_sample)
        conv_val_feat = Conv_model.predict_generator(val_batches, val_batches.nb_sample)
        
        FC_model = Sequential(create_FCbn_layers(p=0.3))
        FC_model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
        train_FCNet()
        
        conv_tst_feat = Conv_model.predict_generator(tst_batches, tst_batches.nb_sample)
        preds = FC_model.predict(conv_tst_feat, batch_size=batch_size*2)
        pred_array.append(preds)
        return pred_array

In [ ]:
prediction_array = Ensemble(num_models=3)
save_array(results_path + 'ensemble_predictions.dat', prediction_array)

In [ ]:
emsemble_preds = np.stack([pred for pred in prediction_array])
preds = ensemble_preds.mean(axis=0)

In [ ]:
filenames = tst_batches.filenames
classes = sorted(trn_batches.class_indices, key=trn_batches.class_indices.get)
submission = pd.DataFrame(preds, columns=classes)
submission.insert(0, 'img', [f[8:] for f in filenames])
submission.head()
submission.to_csv(results_path + 'submission.csv', index=False, compression=None)

from IPython.display import FileLink
FileLink(results_path + 'submission.csv')