Enter State Farm


In [1]:
from __future__ import division, print_function
%matplotlib inline
#path = "data/state/"
path = "data/state/sample/"
from importlib import reload  # Python 3
import utils; reload(utils)
from utils import *
from IPython.display import FileLink


Using cuDNN version 5105 on context None
Mapped name None to device cuda0: GeForce GTX TITAN X (0000:04:00.0)
Using Theano backend.

In [2]:
batch_size=64

Setup batches


In [3]:
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
steps_per_epoch = int(np.ceil(batches.samples/batch_size))
validation_steps = int(np.ceil(val_batches.samples/(batch_size*2)))


Found 1500 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.

In [4]:
(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames, test_filenames) = get_classes(path)


Found 1500 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.
Found 1000 images belonging to 1 classes.

Rather than using batches, we could just import all the data into an array to save some processing time. (In most examples I'm using the batches, however - just because that's how I happened to start out.)


In [5]:
trn = get_data(path+'train')
val = get_data(path+'valid')


Found 1500 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.

In [6]:
save_array(path+'results/val.dat', val)
save_array(path+'results/trn.dat', trn)

In [7]:
val = load_array(path+'results/val.dat')
trn = load_array(path+'results/trn.dat')

Re-run sample experiments on full dataset

We should find that everything that worked on the sample (see statefarm-sample.ipynb), works on the full dataset too. Only better! Because now we have more data. So let's see how they go - the models in this section are exact copies of the sample notebook models.

Single conv layer


In [8]:
def conv1(batches):
    model = Sequential([
            BatchNormalization(axis=1, input_shape=(3,224,224)),
            Conv2D(32,(3,3), activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D((3,3)),
            Conv2D(64,(3,3), activation='relu'),
            BatchNormalization(axis=1),
            MaxPooling2D((3,3)),
            Flatten(),
            Dense(200, activation='relu'),
            BatchNormalization(),
            Dense(10, activation='softmax')
        ])

    model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit_generator(batches, steps_per_epoch, epochs=2, validation_data=val_batches, 
                     validation_steps=validation_steps)
    model.optimizer.lr = 0.001
    model.fit_generator(batches, steps_per_epoch, epochs=4, validation_data=val_batches, 
                     validation_steps=validation_steps)
    return model

In [9]:
model = conv1(batches)


Epoch 1/2
24/24 [==============================] - 11s - loss: 1.6288 - acc: 0.5078 - val_loss: 2.1140 - val_acc: 0.3610
Epoch 2/2
24/24 [==============================] - 8s - loss: 0.3609 - acc: 0.9387 - val_loss: 1.9649 - val_acc: 0.2120
Epoch 1/4
24/24 [==============================] - 10s - loss: 0.0923 - acc: 0.9941 - val_loss: 2.1935 - val_acc: 0.1840
Epoch 2/4
24/24 [==============================] - 8s - loss: 0.0463 - acc: 0.9987 - val_loss: 2.4651 - val_acc: 0.2070
Epoch 3/4
24/24 [==============================] - 8s - loss: 0.0227 - acc: 1.0000 - val_loss: 2.7083 - val_acc: 0.1980
Epoch 4/4
24/24 [==============================] - 8s - loss: 0.0168 - acc: 1.0000 - val_loss: 2.8568 - val_acc: 0.1930

Interestingly, with no regularization or augmentation we're getting some reasonable results from our simple convolutional model. So with augmentation, we hopefully will see some very good results.

Data augmentation


In [10]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches = get_batches(path+'train', gen_t, batch_size=batch_size)


Found 1500 images belonging to 10 classes.

In [11]:
model = conv1(batches)


Epoch 1/2
24/24 [==============================] - 19s - loss: 2.5517 - acc: 0.2342 - val_loss: 3.0401 - val_acc: 0.0870
Epoch 2/2
24/24 [==============================] - 14s - loss: 1.8284 - acc: 0.3829 - val_loss: 2.4557 - val_acc: 0.1440
Epoch 1/4
24/24 [==============================] - 19s - loss: 1.5709 - acc: 0.4824 - val_loss: 1.9356 - val_acc: 0.2810
Epoch 2/4
24/24 [==============================] - 14s - loss: 1.3968 - acc: 0.5445 - val_loss: 2.0191 - val_acc: 0.3300
Epoch 3/4
24/24 [==============================] - 14s - loss: 1.2399 - acc: 0.5936 - val_loss: 2.1663 - val_acc: 0.2880
Epoch 4/4
24/24 [==============================] - 14s - loss: 1.1472 - acc: 0.6296 - val_loss: 2.2554 - val_acc: 0.3090

In [12]:
model.optimizer.lr = 0.0001
model.fit_generator(batches, steps_per_epoch, epochs=15, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/15
24/24 [==============================] - 19s - loss: 1.1066 - acc: 0.6585 - val_loss: 2.3529 - val_acc: 0.3090
Epoch 2/15
24/24 [==============================] - 14s - loss: 1.0195 - acc: 0.6851 - val_loss: 2.4139 - val_acc: 0.2690
Epoch 3/15
24/24 [==============================] - 14s - loss: 0.9877 - acc: 0.6766 - val_loss: 2.3048 - val_acc: 0.3080
Epoch 4/15
24/24 [==============================] - 14s - loss: 0.8891 - acc: 0.7122 - val_loss: 2.4900 - val_acc: 0.2860
Epoch 5/15
24/24 [==============================] - 14s - loss: 0.8721 - acc: 0.7083 - val_loss: 2.4084 - val_acc: 0.3080
Epoch 6/15
24/24 [==============================] - 14s - loss: 0.8127 - acc: 0.7443 - val_loss: 2.3771 - val_acc: 0.3320
Epoch 7/15
24/24 [==============================] - 14s - loss: 0.8007 - acc: 0.7498 - val_loss: 2.0113 - val_acc: 0.3810
Epoch 8/15
24/24 [==============================] - 15s - loss: 0.7332 - acc: 0.7606 - val_loss: 1.8840 - val_acc: 0.3860
Epoch 9/15
24/24 [==============================] - 14s - loss: 0.6935 - acc: 0.7858 - val_loss: 1.5289 - val_acc: 0.4890
Epoch 10/15
24/24 [==============================] - 14s - loss: 0.6480 - acc: 0.8039 - val_loss: 1.3313 - val_acc: 0.5580
Epoch 11/15
24/24 [==============================] - 14s - loss: 0.6735 - acc: 0.8042 - val_loss: 1.1348 - val_acc: 0.5850
Epoch 12/15
24/24 [==============================] - 14s - loss: 0.6330 - acc: 0.7978 - val_loss: 1.0516 - val_acc: 0.6220
Epoch 13/15
24/24 [==============================] - 14s - loss: 0.5796 - acc: 0.8244 - val_loss: 0.6730 - val_acc: 0.7860
Epoch 14/15
24/24 [==============================] - 14s - loss: 0.6057 - acc: 0.8105 - val_loss: 0.9067 - val_acc: 0.6850
Epoch 15/15
24/24 [==============================] - 14s - loss: 0.5237 - acc: 0.8474 - val_loss: 0.7181 - val_acc: 0.7350
Out[12]:
<keras.callbacks.History at 0x7fa71afc9860>

I'm shocked by how good these results are! We're regularly seeing 75-80% accuracy on the validation set, which puts us into the top third or better of the competition. With such a simple model and no dropout or semi-supervised learning, this really speaks to the power of this approach to data augmentation.

Four conv/pooling pairs + dropout

Unfortunately, the results are still very unstable - the validation accuracy jumps from epoch to epoch. Perhaps a deeper model with some dropout would help.


In [13]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches = get_batches(path+'train', gen_t, batch_size=batch_size)


Found 1500 images belonging to 10 classes.

In [14]:
model = Sequential([
        BatchNormalization(axis=1, input_shape=(3,224,224)),
        Conv2D(32,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Conv2D(64,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Conv2D(128,(3,3), activation='relu'),
        BatchNormalization(axis=1),
        MaxPooling2D(),
        Flatten(),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(200, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])

In [15]:
model.compile(Adam(lr=10e-5), loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
model.fit_generator(batches, steps_per_epoch, epochs=2, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/2
24/24 [==============================] - 19s - loss: 3.4312 - acc: 0.1272 - val_loss: 2.2827 - val_acc: 0.1440
Epoch 2/2
24/24 [==============================] - 14s - loss: 3.2486 - acc: 0.1519 - val_loss: 2.2143 - val_acc: 0.1660
Out[16]:
<keras.callbacks.History at 0x7fa7349d15c0>

In [17]:
model.optimizer.lr=0.001

In [18]:
model.fit_generator(batches, steps_per_epoch, epochs=10, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/10
24/24 [==============================] - 19s - loss: 2.9768 - acc: 0.1977 - val_loss: 2.1982 - val_acc: 0.2180
Epoch 2/10
24/24 [==============================] - 14s - loss: 2.9122 - acc: 0.2020 - val_loss: 2.2579 - val_acc: 0.1840
Epoch 3/10
24/24 [==============================] - 14s - loss: 2.6762 - acc: 0.2319 - val_loss: 2.3272 - val_acc: 0.1800
Epoch 4/10
24/24 [==============================] - 14s - loss: 2.6483 - acc: 0.2394 - val_loss: 2.5627 - val_acc: 0.1350
Epoch 5/10
24/24 [==============================] - 14s - loss: 2.4357 - acc: 0.2881 - val_loss: 2.7177 - val_acc: 0.1350
Epoch 6/10
24/24 [==============================] - 14s - loss: 2.4340 - acc: 0.3014 - val_loss: 2.7404 - val_acc: 0.1350
Epoch 7/10
24/24 [==============================] - 14s - loss: 2.3677 - acc: 0.2964 - val_loss: 2.5092 - val_acc: 0.1740
Epoch 8/10
24/24 [==============================] - 14s - loss: 2.2160 - acc: 0.3417 - val_loss: 2.2802 - val_acc: 0.2190
Epoch 9/10
24/24 [==============================] - 14s - loss: 2.1294 - acc: 0.3573 - val_loss: 2.0614 - val_acc: 0.2820
Epoch 10/10
24/24 [==============================] - 14s - loss: 2.1194 - acc: 0.3764 - val_loss: 1.8203 - val_acc: 0.3200
Out[18]:
<keras.callbacks.History at 0x7fa7349d1ba8>

In [19]:
model.optimizer.lr=0.00001

In [20]:
model.fit_generator(batches, steps_per_epoch, epochs=10, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/10
24/24 [==============================] - 19s - loss: 2.0358 - acc: 0.3955 - val_loss: 1.6286 - val_acc: 0.3970
Epoch 2/10
24/24 [==============================] - 14s - loss: 2.0127 - acc: 0.3948 - val_loss: 1.5521 - val_acc: 0.4410
Epoch 3/10
24/24 [==============================] - 14s - loss: 1.9452 - acc: 0.4147 - val_loss: 1.4783 - val_acc: 0.4750
Epoch 4/10
24/24 [==============================] - 14s - loss: 1.8391 - acc: 0.4318 - val_loss: 1.3409 - val_acc: 0.5510
Epoch 5/10
24/24 [==============================] - 14s - loss: 1.8084 - acc: 0.4394 - val_loss: 1.1594 - val_acc: 0.6340
Epoch 6/10
24/24 [==============================] - 14s - loss: 1.8225 - acc: 0.4251 - val_loss: 1.0878 - val_acc: 0.6550
Epoch 7/10
24/24 [==============================] - 14s - loss: 1.7267 - acc: 0.4604 - val_loss: 0.9712 - val_acc: 0.6970
Epoch 8/10
24/24 [==============================] - 14s - loss: 1.6769 - acc: 0.4865 - val_loss: 0.9680 - val_acc: 0.6720
Epoch 9/10
24/24 [==============================] - 14s - loss: 1.6258 - acc: 0.4758 - val_loss: 0.8944 - val_acc: 0.7010
Epoch 10/10
24/24 [==============================] - 14s - loss: 1.6113 - acc: 0.4774 - val_loss: 0.7997 - val_acc: 0.7540
Out[20]:
<keras.callbacks.History at 0x7fa7349d1fd0>

This is looking quite a bit better - the accuracy is similar, but the stability is higher. There's still some way to go however...

Imagenet conv features

Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful - in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all. So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout. (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)


In [21]:
vgg = Vgg16()
model=vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx+1]

In [22]:
conv_model = Sequential(conv_layers)

In [23]:
(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames, test_filenames) = get_classes(path)


Found 1500 images belonging to 10 classes.
Found 1000 images belonging to 10 classes.
Found 1000 images belonging to 1 classes.

In [24]:
test_batches = get_batches(path+'test', batch_size=batch_size*2, shuffle=False)


Found 1000 images belonging to 1 classes.

In [25]:
conv_feat = conv_model.predict_generator(batches, int(np.ceil(batches.samples/batch_size)))
conv_val_feat = conv_model.predict_generator(val_batches, int(np.ceil(val_batches.samples/(batch_size*2))))
conv_test_feat = conv_model.predict_generator(test_batches, int(np.ceil(test_batches.samples/(batch_size*2))))

In [26]:
save_array(path+'results/conv_val_feat.dat', conv_val_feat)
save_array(path+'results/conv_test_feat.dat', conv_test_feat)
save_array(path+'results/conv_feat.dat', conv_feat)

In [27]:
conv_feat = load_array(path+'results/conv_feat.dat')
conv_val_feat = load_array(path+'results/conv_val_feat.dat')
conv_val_feat.shape


Out[27]:
(1000, 512, 14, 14)

Batchnorm dense layers on pretrained conv layers

Since we've pre-computed the output of the last convolutional layer, we need to create a network that takes that as input, and predicts our 10 classes. Let's try using a simplified version of VGG's dense layers.


In [28]:
def get_bn_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p/2),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(p/2),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
        ]

In [29]:
p=0.8

In [30]:
bn_model = Sequential(get_bn_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [31]:
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, epochs=1, 
             validation_data=(conv_val_feat, val_labels))


Train on 1500 samples, validate on 1000 samples
Epoch 1/1
1500/1500 [==============================] - 0s - loss: 4.8686 - acc: 0.1140 - val_loss: 5.4802 - val_acc: 0.0810
Out[31]:
<keras.callbacks.History at 0x7fa70e528da0>

In [32]:
bn_model.optimizer.lr=0.01

In [33]:
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, epochs=2, 
             validation_data=(conv_val_feat, val_labels))


Train on 1500 samples, validate on 1000 samples
Epoch 1/2
1500/1500 [==============================] - 0s - loss: 4.2383 - acc: 0.1120 - val_loss: 3.8402 - val_acc: 0.0890
Epoch 2/2
1500/1500 [==============================] - 0s - loss: 3.9045 - acc: 0.1387 - val_loss: 3.2730 - val_acc: 0.0950
Out[33]:
<keras.callbacks.History at 0x7fa70e33e470>

In [34]:
bn_model.save_weights(path+'models/conv8.h5')

Looking good! Let's try pre-computing 5 epochs worth of augmented data, so we can experiment with combining dropout and augmentation on the pre-trained model.

Pre-computed data augmentation + dropout

We'll use our usual data augmentation parameters:


In [35]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
da_batches = get_batches(path+'train', gen_t, batch_size=batch_size, shuffle=False)


Found 1500 images belonging to 10 classes.

We use those to create a dataset of convolutional features 5x bigger than the training set.


In [36]:
da_conv_feat = conv_model.predict_generator(da_batches,  5*int(np.ceil((da_batches.samples)/(batch_size))), workers=3)

In [37]:
save_array(path+'results/da_conv_feat2.dat', da_conv_feat)

In [38]:
da_conv_feat = load_array(path+'results/da_conv_feat2.dat')

Let's include the real training data as well in its non-augmented form.


In [39]:
da_conv_feat = np.concatenate([da_conv_feat, conv_feat])

Since we've now got a dataset 6x bigger than before, we'll need to copy our labels 6 times too.


In [40]:
da_trn_labels = np.concatenate([trn_labels]*6)

Based on some experiments the previous model works well, with bigger dense layers.


In [41]:
def get_bn_da_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(p),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
        ]

In [42]:
p=0.8

In [43]:
bn_model = Sequential(get_bn_da_layers(p))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

Now we can train the model as usual, with pre-computed augmented data.


In [44]:
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, epochs=1, 
             validation_data=(conv_val_feat, val_labels))


Train on 9000 samples, validate on 1000 samples
Epoch 1/1
9000/9000 [==============================] - 1s - loss: 4.2507 - acc: 0.1182 - val_loss: 2.4722 - val_acc: 0.0640
Out[44]:
<keras.callbacks.History at 0x7fa70d9a1898>

In [45]:
bn_model.optimizer.lr=0.01

In [46]:
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, epochs=4, 
             validation_data=(conv_val_feat, val_labels))


Train on 9000 samples, validate on 1000 samples
Epoch 1/4
9000/9000 [==============================] - 1s - loss: 3.1055 - acc: 0.1453 - val_loss: 2.4364 - val_acc: 0.0840
Epoch 2/4
9000/9000 [==============================] - 1s - loss: 2.5409 - acc: 0.1819 - val_loss: 2.4281 - val_acc: 0.1050
Epoch 3/4
9000/9000 [==============================] - 1s - loss: 2.2547 - acc: 0.2176 - val_loss: 2.4580 - val_acc: 0.1170
Epoch 4/4
9000/9000 [==============================] - 1s - loss: 2.0941 - acc: 0.2583 - val_loss: 2.4708 - val_acc: 0.0930
Out[46]:
<keras.callbacks.History at 0x7fa70d9a1e80>

In [47]:
bn_model.optimizer.lr=0.0001

In [48]:
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, epochs=4, 
             validation_data=(conv_val_feat, val_labels))


Train on 9000 samples, validate on 1000 samples
Epoch 1/4
9000/9000 [==============================] - 1s - loss: 1.9996 - acc: 0.2886 - val_loss: 2.5000 - val_acc: 0.0620
Epoch 2/4
9000/9000 [==============================] - 1s - loss: 1.9379 - acc: 0.3199 - val_loss: 2.5293 - val_acc: 0.0610
Epoch 3/4
9000/9000 [==============================] - 1s - loss: 1.8729 - acc: 0.3428 - val_loss: 2.5577 - val_acc: 0.1120
Epoch 4/4
9000/9000 [==============================] - 1s - loss: 1.8278 - acc: 0.3627 - val_loss: 2.5695 - val_acc: 0.0330
Out[48]:
<keras.callbacks.History at 0x7fa70d9a1eb8>

Looks good - let's save those weights.


In [49]:
bn_model.save_weights(path+'models/da_conv8_1.h5')

Pseudo labeling

We're going to try using a combination of pseudo labeling and knowledge distillation to allow us to use unlabeled data (i.e. do semi-supervised learning). For our initial experiment we'll use the validation set as the unlabeled data, so that we can see that it is working without using the test set. At a later date we'll try using the test set.

To do this, we simply calculate the predictions of our model...


In [50]:
val_pseudo = bn_model.predict(conv_val_feat, batch_size=batch_size)

...concatenate them with our training labels...


In [51]:
comb_pseudo = np.concatenate([da_trn_labels, val_pseudo])

In [52]:
comb_feat = np.concatenate([da_conv_feat, conv_val_feat])

...and fine-tune our model using that data.


In [53]:
bn_model.load_weights(path+'models/da_conv8_1.h5')

In [54]:
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, epochs=1, 
             validation_data=(conv_val_feat, val_labels))


Train on 10000 samples, validate on 1000 samples
Epoch 1/1
10000/10000 [==============================] - 1s - loss: 1.8196 - acc: 0.3931 - val_loss: 2.5734 - val_acc: 0.0900
Out[54]:
<keras.callbacks.History at 0x7fa70d7fa4e0>

In [55]:
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, epochs=4, 
             validation_data=(conv_val_feat, val_labels))


Train on 10000 samples, validate on 1000 samples
Epoch 1/4
10000/10000 [==============================] - 1s - loss: 1.7807 - acc: 0.4056 - val_loss: 2.5874 - val_acc: 0.0590
Epoch 2/4
10000/10000 [==============================] - 1s - loss: 1.7470 - acc: 0.4218 - val_loss: 2.6081 - val_acc: 0.0980
Epoch 3/4
10000/10000 [==============================] - 1s - loss: 1.7326 - acc: 0.4333 - val_loss: 2.6495 - val_acc: 0.0950
Epoch 4/4
10000/10000 [==============================] - 1s - loss: 1.6867 - acc: 0.4437 - val_loss: 2.6618 - val_acc: 0.0980
Out[55]:
<keras.callbacks.History at 0x7fa70e528a90>

In [56]:
bn_model.optimizer.lr=0.00001

In [57]:
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, epochs=4, 
             validation_data=(conv_val_feat, val_labels))


Train on 10000 samples, validate on 1000 samples
Epoch 1/4
10000/10000 [==============================] - 1s - loss: 1.6822 - acc: 0.4577 - val_loss: 2.6546 - val_acc: 0.0360
Epoch 2/4
10000/10000 [==============================] - 1s - loss: 1.6588 - acc: 0.4633 - val_loss: 2.6771 - val_acc: 0.0970
Epoch 3/4
10000/10000 [==============================] - 1s - loss: 1.6433 - acc: 0.4734 - val_loss: 2.6940 - val_acc: 0.1010
Epoch 4/4
10000/10000 [==============================] - 1s - loss: 1.6264 - acc: 0.4811 - val_loss: 2.6691 - val_acc: 0.0990
Out[57]:
<keras.callbacks.History at 0x7fa70d5a6048>

That's a distinct improvement - even although the validation set isn't very big. This looks encouraging for when we try this on the test set.


In [58]:
bn_model.save_weights(path+'models/bn-ps8.h5')

Submit

We'll find a good clipping amount using the validation set, prior to submitting.


In [59]:
def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)

In [62]:
val_preds = bn_model.predict(conv_val_feat, batch_size=batch_size*2)

In [64]:
np.mean(keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.93)).eval())


Out[64]:
2.6576343823075295

In [65]:
conv_test_feat = load_array(path+'results/conv_test_feat.dat')

In [66]:
preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)

In [67]:
subm = do_clip(preds,0.93)

In [68]:
subm_name = path+'results/subm.gz'

In [69]:
classes = sorted(batches.class_indices, key=batches.class_indices.get)

In [70]:
submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'img', [a[4:] for a in test_filenames])
submission.head()


Out[70]:
img c0 c1 c2 c3 c4 c5 c6 c7 c8 c9
0 img_4285.jpg 0.166412 0.128908 0.060377 0.032914 0.030211 0.041719 0.075996 0.080122 0.182443 0.200898
1 img_80798.jpg 0.070970 0.119195 0.037339 0.024987 0.023366 0.048560 0.213575 0.171950 0.196696 0.093363
2 img_21674.jpg 0.123932 0.309131 0.066590 0.023665 0.020624 0.077198 0.183129 0.072145 0.070190 0.053396
3 img_84804.jpg 0.229288 0.124297 0.029818 0.106480 0.081427 0.101015 0.106151 0.053900 0.084249 0.083375
4 img_27796.jpg 0.018616 0.036482 0.185603 0.018809 0.031853 0.041242 0.175815 0.177964 0.276384 0.037232

In [71]:
submission.to_csv(subm_name, index=False, compression='gzip')

In [72]:
FileLink(subm_name)




This gets 0.534 on the leaderboard.

The "things that didn't really work" section

You can safely ignore everything from here on, because they didn't really help.

Finetune some conv layers too


In [73]:
#for l in get_bn_layers(p): conv_model.add(l)  #  this choice would give a weight shape error
for l in get_bn_da_layers(p): conv_model.add(l)  # ... so probably this is the right one

In [74]:
for l1,l2 in zip(bn_model.layers, conv_model.layers[last_conv_idx+1:]):
    l2.set_weights(l1.get_weights())

In [75]:
for l in conv_model.layers: l.trainable =False

In [76]:
for l in conv_model.layers[last_conv_idx+1:]: l.trainable =True

In [77]:
comb = np.concatenate([trn, val])

In [78]:
# not knowing what the experiment was about, added this to avoid a shape match error with comb using gen_t.flow
comb_pseudo = np.concatenate([trn_labels, val_pseudo])

In [79]:
gen_t = image.ImageDataGenerator(rotation_range=8, height_shift_range=0.04, 
                shear_range=0.03, channel_shift_range=10, width_shift_range=0.08)

In [80]:
batches = gen_t.flow(comb, comb_pseudo, batch_size=batch_size)

In [81]:
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)


Found 1000 images belonging to 10 classes.

In [82]:
conv_model.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

In [83]:
conv_model.fit_generator(batches, steps_per_epoch, epochs=1, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/1
24/24 [==============================] - 24s - loss: 1.9373 - acc: 0.4036 - val_loss: 0.9014 - val_acc: 0.7530
Out[83]:
<keras.callbacks.History at 0x7fa70a534588>

In [84]:
conv_model.optimizer.lr = 0.0001

In [85]:
conv_model.fit_generator(batches, steps_per_epoch, epochs=3, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/3
24/24 [==============================] - 24s - loss: 1.9821 - acc: 0.3919 - val_loss: 0.9012 - val_acc: 0.7530
Epoch 2/3
24/24 [==============================] - 25s - loss: 1.9500 - acc: 0.3802 - val_loss: 0.9005 - val_acc: 0.7560
Epoch 3/3
24/24 [==============================] - 25s - loss: 1.9880 - acc: 0.3841 - val_loss: 0.9017 - val_acc: 0.7560
Out[85]:
<keras.callbacks.History at 0x7fa7012a60f0>

In [86]:
for l in conv_model.layers[16:]: l.trainable =True

In [87]:
conv_model.optimizer.lr = 0.00001

In [88]:
conv_model.fit_generator(batches, steps_per_epoch, epochs=8, validation_data=val_batches, 
                 validation_steps=validation_steps)


Epoch 1/8
24/24 [==============================] - 25s - loss: 1.9115 - acc: 0.4165 - val_loss: 0.9011 - val_acc: 0.7570
Epoch 2/8
24/24 [==============================] - 24s - loss: 1.8996 - acc: 0.4087 - val_loss: 0.9018 - val_acc: 0.7620
Epoch 3/8
24/24 [==============================] - 107s - loss: 1.9268 - acc: 0.4102 - val_loss: 0.9032 - val_acc: 0.7610
Epoch 4/8
24/24 [==============================] - 18s - loss: 1.9858 - acc: 0.4013 - val_loss: 0.9023 - val_acc: 0.7660
Epoch 5/8
24/24 [==============================] - 18s - loss: 1.9454 - acc: 0.3991 - val_loss: 0.9034 - val_acc: 0.7660
Epoch 6/8
24/24 [==============================] - 19s - loss: 1.9289 - acc: 0.3882 - val_loss: 0.9029 - val_acc: 0.7710
Epoch 7/8
24/24 [==============================] - 19s - loss: 1.9780 - acc: 0.3759 - val_loss: 0.9028 - val_acc: 0.7710
Epoch 8/8
24/24 [==============================] - 19s - loss: 1.9681 - acc: 0.3874 - val_loss: 0.9031 - val_acc: 0.7730
Out[88]:
<keras.callbacks.History at 0x7fa7012a6438>

In [89]:
conv_model.save_weights(path+'models/conv8_ps.h5')

In [90]:
#conv_model.load_weights(path+'models/conv8_da.h5')  # conv8_da.h5 was not saved in this notebook

In [91]:
val_pseudo = conv_model.predict(val, batch_size=batch_size*2)

In [92]:
save_array(path+'models/pseudo8_da.dat', val_pseudo)

Ensembling


In [93]:
drivers_ds = pd.read_csv(path+'driver_imgs_list.csv')
drivers_ds.head()


Out[93]:
subject classname img
0 p002 c0 img_44733.jpg
1 p002 c0 img_72999.jpg
2 p002 c0 img_25094.jpg
3 p002 c0 img_69092.jpg
4 p002 c0 img_92629.jpg

In [94]:
img2driver = drivers_ds.set_index('img')['subject'].to_dict()

In [95]:
driver2imgs = {k: g["img"].tolist() 
               for k,g in drivers_ds[['subject', 'img']].groupby("subject")}

In [98]:
# It seems this function is not used in this notebook
def get_idx(driver_list):
    return [i for i,f in enumerate(filenames) if img2driver[f[3:]] in driver_list]

In [99]:
# drivers = driver2imgs.keys()  # Python 2
drivers = list(driver2imgs)  # Python 3

In [101]:
rnd_drivers = np.random.permutation(drivers)

In [102]:
ds1 = rnd_drivers[:len(rnd_drivers)//2]
ds2 = rnd_drivers[len(rnd_drivers)//2:]

In [ ]:
# The following cells seem to require some preparation code not included in this notebook
models=[fit_conv([d]) for d in drivers]
models=[m for m in models if m is not None]

In [ ]:
all_preds = np.stack([m.predict(conv_test_feat, batch_size=128) for m in models])
avg_preds = all_preds.mean(axis=0)
avg_preds = avg_preds/np.expand_dims(avg_preds.sum(axis=1), 1)

In [ ]:
keras.metrics.categorical_crossentropy(val_labels, np.clip(avg_val_preds,0.01,0.99)).eval()

In [ ]:
keras.metrics.categorical_accuracy(val_labels, np.clip(avg_val_preds,0.01,0.99)).eval()

In [ ]: