In [1]:
from theano.sandbox import cuda

%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

#path = "data/dogscats/sample/"
path = "data/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

batch_size=64


Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
Using Theano backend.

In [2]:
model = vgg_ft(2)

In [3]:
model.load_weights(model_path+'finetune3.h5')

In [4]:
layers = model.layers
last_conv_idx = [index for index,layer in enumerate(layers) 
                     if type(layer) is Convolution2D][-1]

In [5]:
conv_layers = layers[:last_conv_idx+1]
conv_model = Sequential(conv_layers)
# Dense layers - also known as fully connected or 'FC' layers
fc_layers = layers[last_conv_idx+1:]

In [6]:
batches = get_batches(path+'train', shuffle=False, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)

val_classes = val_batches.classes
trn_classes = batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)


Found 21000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.

In [7]:
val_features = conv_model.predict_generator(val_batches, val_batches.nb_sample)

In [8]:
trn_features = conv_model.predict_generator(batches, batches.nb_sample)

In [9]:
save_array(model_path + 'train_convlayer_features.bc', trn_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)

In [10]:
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')

In [11]:
# Copy the weights from the pre-trained model.
# NB: Since we're removing dropout, we want to half the weights
def proc_wgts(layer): return [o/2 for o in layer.get_weights()]

In [12]:
# Such a finely tuned model needs to be updated very slowly!
opt = RMSprop(lr=0.00001, rho=0.7)

In [13]:
def get_fc_model():
    model = Sequential([
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.),
        Dense(4096, activation='relu'),
        Dropout(0.),
        Dense(2, activation='softmax')
        ])

    for l1,l2 in zip(model.layers, fc_layers): l1.set_weights(proc_wgts(l2))

    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [14]:
gen = image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, 
                               height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True)



batches = get_batches(path+'train', gen, batch_size=batch_size)
# NB: We don't want to augment or shuffle the validation set
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)


fc_model = get_fc_model()


Found 21000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.

In [15]:
fc_model.fit(trn_features, trn_labels, nb_epoch=8, 
             batch_size=batch_size, validation_data=(val_features, val_labels))


Train on 21000 samples, validate on 4000 samples
Epoch 1/8
21000/21000 [==============================] - 31s - loss: 0.0421 - acc: 0.9860 - val_loss: 0.0617 - val_acc: 0.9840
Epoch 2/8
21000/21000 [==============================] - 31s - loss: 0.0101 - acc: 0.9973 - val_loss: 0.0679 - val_acc: 0.9850
Epoch 3/8
21000/21000 [==============================] - 31s - loss: 0.0027 - acc: 0.9995 - val_loss: 0.1116 - val_acc: 0.9822
Epoch 4/8
21000/21000 [==============================] - 31s - loss: 0.0025 - acc: 0.9998 - val_loss: 0.1271 - val_acc: 0.9845
Epoch 5/8
21000/21000 [==============================] - 31s - loss: 0.0026 - acc: 0.9998 - val_loss: 0.1279 - val_acc: 0.9830
Epoch 6/8
21000/21000 [==============================] - 31s - loss: 0.0022 - acc: 0.9998 - val_loss: 0.1370 - val_acc: 0.9832
Epoch 7/8
21000/21000 [==============================] - 31s - loss: 0.0021 - acc: 0.9998 - val_loss: 0.1310 - val_acc: 0.9828
Epoch 8/8
21000/21000 [==============================] - 31s - loss: 0.0021 - acc: 0.9998 - val_loss: 0.1359 - val_acc: 0.9830
Out[15]:
<keras.callbacks.History at 0x7faa7b762650>

In [16]:
fc_model.save_weights(model_path+'no_dropout.h5')

In [17]:
fc_model = get_fc_model()

In [18]:
for layer in conv_model.layers: layer.trainable = False
# Look how easy it is to connect two models together!
conv_model.add(fc_model)

In [19]:
conv_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
conv_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=8,                        
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)


Epoch 1/8
21000/21000 [==============================] - 623s - loss: 0.0533 - acc: 0.9816 - val_loss: 0.0871 - val_acc: 0.9732
Epoch 2/8
21000/21000 [==============================] - 624s - loss: 0.0414 - acc: 0.9870 - val_loss: 0.0618 - val_acc: 0.9828
Epoch 3/8
21000/21000 [==============================] - 622s - loss: 0.0345 - acc: 0.9894 - val_loss: 0.0642 - val_acc: 0.9832
Epoch 4/8
21000/21000 [==============================] - 624s - loss: 0.0272 - acc: 0.9906 - val_loss: 0.0634 - val_acc: 0.9840
Epoch 5/8
21000/21000 [==============================] - 625s - loss: 0.0279 - acc: 0.9921 - val_loss: 0.0659 - val_acc: 0.9845
Epoch 6/8
21000/21000 [==============================] - 628s - loss: 0.0223 - acc: 0.9934 - val_loss: 0.0793 - val_acc: 0.9810
Epoch 7/8
21000/21000 [==============================] - 628s - loss: 0.0185 - acc: 0.9940 - val_loss: 0.0782 - val_acc: 0.9835
Epoch 8/8
21000/21000 [==============================] - 629s - loss: 0.0177 - acc: 0.9952 - val_loss: 0.0751 - val_acc: 0.9845
Out[20]:
<keras.callbacks.History at 0x7faa8addfa50>

In [21]:
conv_model.save_weights(model_path + 'aug1.h5')

In [22]:
def get_bn_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(p),
        BatchNormalization(),
        Dense(4096, activation='relu'),
        Dropout(p),
        BatchNormalization(),
        Dense(1000, activation='softmax')
        ]

In [23]:
p=0.6

In [24]:
bn_model = Sequential(get_bn_layers(0.6))

In [25]:
def proc_wgts(layer, prev_p, new_p):
    scal = (1-prev_p)/(1-new_p)
    return [o*scal for o in layer.get_weights()]

In [26]:
bn_model.pop()
for layer in bn_model.layers: layer.trainable=False

In [27]:
bn_model.add(Dense(2,activation='softmax'))

In [28]:
bn_model.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])

In [29]:
bn_model.fit(trn_features, trn_labels, nb_epoch=8, validation_data=(val_features, val_labels))


Train on 21000 samples, validate on 4000 samples
Epoch 1/8
21000/21000 [==============================] - 13s - loss: 0.3203 - acc: 0.8682 - val_loss: 0.2359 - val_acc: 0.9028
Epoch 2/8
21000/21000 [==============================] - 13s - loss: 0.2763 - acc: 0.8977 - val_loss: 0.3251 - val_acc: 0.8815
Epoch 3/8
21000/21000 [==============================] - 13s - loss: 0.2915 - acc: 0.8987 - val_loss: 0.2332 - val_acc: 0.9107
Epoch 4/8
21000/21000 [==============================] - 13s - loss: 0.2982 - acc: 0.8980 - val_loss: 0.2119 - val_acc: 0.9165
Epoch 5/8
21000/21000 [==============================] - 13s - loss: 0.2963 - acc: 0.9003 - val_loss: 0.2341 - val_acc: 0.9113
Epoch 6/8
21000/21000 [==============================] - 13s - loss: 0.3100 - acc: 0.8978 - val_loss: 0.2610 - val_acc: 0.9077
Epoch 7/8
21000/21000 [==============================] - 13s - loss: 0.3198 - acc: 0.8950 - val_loss: 0.2179 - val_acc: 0.9165
Epoch 8/8
21000/21000 [==============================] - 13s - loss: 0.3107 - acc: 0.8969 - val_loss: 0.2180 - val_acc: 0.9203
Out[29]:
<keras.callbacks.History at 0x7faa72e3cad0>

In [30]:
bn_model.save_weights(model_path+'bn.h5')

In [31]:
bn_layers = get_bn_layers(0.6)
bn_layers.pop()
bn_layers.append(Dense(2,activation='softmax'))

In [32]:
final_model = Sequential(conv_layers)
for layer in final_model.layers: layer.trainable = False
for layer in bn_layers: final_model.add(layer)

In [33]:
for l1,l2 in zip(bn_model.layers, bn_layers):
    l2.set_weights(l1.get_weights())

In [34]:
final_model.compile(optimizer=Adam(), 
                    loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=1, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)


Epoch 1/1
21000/21000 [==============================] - 632s - loss: 0.3065 - acc: 0.9530 - val_loss: 0.1097 - val_acc: 0.9718
Out[35]:
<keras.callbacks.History at 0x7faa66444810>

In [36]:
final_model.save_weights(model_path + 'final1.h5')

In [37]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=4, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)


Epoch 1/4
21000/21000 [==============================] - 635s - loss: 0.1141 - acc: 0.9656 - val_loss: 0.0657 - val_acc: 0.9795
Epoch 2/4
21000/21000 [==============================] - 637s - loss: 0.0828 - acc: 0.9718 - val_loss: 0.0779 - val_acc: 0.9762
Epoch 3/4
21000/21000 [==============================] - 637s - loss: 0.0633 - acc: 0.9776 - val_loss: 0.0632 - val_acc: 0.9822
Epoch 4/4
21000/21000 [==============================] - 636s - loss: 0.0644 - acc: 0.9774 - val_loss: 0.0521 - val_acc: 0.9828
Out[37]:
<keras.callbacks.History at 0x7faa65d9e950>

In [56]:
final_model.save('final_model.h5')

In [62]:
test_path = os.path.join(path, 'test')
test_batches = get_batches(test_path, shuffle=False, batch_size=batch_size)

conv_test_feat = final_model.predict_generator(test_batches, test_batches.nb_sample)

save_array('conv_test_feat.dat', conv_test_feat)


Found 12500 images belonging to 1 classes.

In [73]:
#preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)
#preds
isdog = conv_test_feat[:,1]
#isdog = preds[:,1]
isdog = isdog.clip(min=0.00000001, max=0.99999999)
filenames = test_batches.filenames
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
subm = np.stack([ids,isdog], axis=1)
subm
submission_file_name = 'submission2.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')

In [ ]: