In [1]:
from theano.sandbox import cuda
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function
#path = "data/dogscats/sample/"
path = "data/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)
batch_size=64
In [2]:
model = vgg_ft(2)
In [3]:
model.load_weights(model_path+'finetune3.h5')
In [4]:
layers = model.layers
last_conv_idx = [index for index,layer in enumerate(layers)
if type(layer) is Convolution2D][-1]
In [5]:
conv_layers = layers[:last_conv_idx+1]
conv_model = Sequential(conv_layers)
# Dense layers - also known as fully connected or 'FC' layers
fc_layers = layers[last_conv_idx+1:]
In [6]:
batches = get_batches(path+'train', shuffle=False, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)
val_classes = val_batches.classes
trn_classes = batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)
In [7]:
val_features = conv_model.predict_generator(val_batches, val_batches.nb_sample)
In [8]:
trn_features = conv_model.predict_generator(batches, batches.nb_sample)
In [9]:
save_array(model_path + 'train_convlayer_features.bc', trn_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)
In [10]:
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')
In [11]:
# Copy the weights from the pre-trained model.
# NB: Since we're removing dropout, we want to half the weights
def proc_wgts(layer): return [o/2 for o in layer.get_weights()]
In [12]:
# Such a finely tuned model needs to be updated very slowly!
opt = RMSprop(lr=0.00001, rho=0.7)
In [13]:
def get_fc_model():
model = Sequential([
MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
Flatten(),
Dense(4096, activation='relu'),
Dropout(0.),
Dense(4096, activation='relu'),
Dropout(0.),
Dense(2, activation='softmax')
])
for l1,l2 in zip(model.layers, fc_layers): l1.set_weights(proc_wgts(l2))
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
return model
In [14]:
gen = image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1,
height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True)
batches = get_batches(path+'train', gen, batch_size=batch_size)
# NB: We don't want to augment or shuffle the validation set
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)
fc_model = get_fc_model()
In [15]:
fc_model.fit(trn_features, trn_labels, nb_epoch=8,
batch_size=batch_size, validation_data=(val_features, val_labels))
Out[15]:
In [16]:
fc_model.save_weights(model_path+'no_dropout.h5')
In [17]:
fc_model = get_fc_model()
In [18]:
for layer in conv_model.layers: layer.trainable = False
# Look how easy it is to connect two models together!
conv_model.add(fc_model)
In [19]:
conv_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
In [20]:
conv_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=8,
validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[20]:
In [21]:
conv_model.save_weights(model_path + 'aug1.h5')
In [22]:
def get_bn_layers(p):
return [
MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
Flatten(),
Dense(4096, activation='relu'),
Dropout(p),
BatchNormalization(),
Dense(4096, activation='relu'),
Dropout(p),
BatchNormalization(),
Dense(1000, activation='softmax')
]
In [23]:
p=0.6
In [24]:
bn_model = Sequential(get_bn_layers(0.6))
In [25]:
def proc_wgts(layer, prev_p, new_p):
scal = (1-prev_p)/(1-new_p)
return [o*scal for o in layer.get_weights()]
In [26]:
bn_model.pop()
for layer in bn_model.layers: layer.trainable=False
In [27]:
bn_model.add(Dense(2,activation='softmax'))
In [28]:
bn_model.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])
In [29]:
bn_model.fit(trn_features, trn_labels, nb_epoch=8, validation_data=(val_features, val_labels))
Out[29]:
In [30]:
bn_model.save_weights(model_path+'bn.h5')
In [31]:
bn_layers = get_bn_layers(0.6)
bn_layers.pop()
bn_layers.append(Dense(2,activation='softmax'))
In [32]:
final_model = Sequential(conv_layers)
for layer in final_model.layers: layer.trainable = False
for layer in bn_layers: final_model.add(layer)
In [33]:
for l1,l2 in zip(bn_model.layers, bn_layers):
l2.set_weights(l1.get_weights())
In [34]:
final_model.compile(optimizer=Adam(),
loss='categorical_crossentropy', metrics=['accuracy'])
In [35]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=1,
validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[35]:
In [36]:
final_model.save_weights(model_path + 'final1.h5')
In [37]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=4,
validation_data=val_batches, nb_val_samples=val_batches.nb_sample)
Out[37]:
In [56]:
final_model.save('final_model.h5')
In [62]:
test_path = os.path.join(path, 'test')
test_batches = get_batches(test_path, shuffle=False, batch_size=batch_size)
conv_test_feat = final_model.predict_generator(test_batches, test_batches.nb_sample)
save_array('conv_test_feat.dat', conv_test_feat)
In [73]:
#preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)
#preds
isdog = conv_test_feat[:,1]
#isdog = preds[:,1]
isdog = isdog.clip(min=0.00000001, max=0.99999999)
filenames = test_batches.filenames
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
subm = np.stack([ids,isdog], axis=1)
subm
submission_file_name = 'submission2.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')
In [ ]: