In [2]:
from theano.sandbox import cuda
cuda.use('gpu0')
In [3]:
%matplotlib inline
from __future__ import print_function, division
from importlib import reload
import utils; reload(utils)
from utils import *
from IPython.display import FileLink
In [4]:
LESSON_HOME_DIR='/home/ubuntu/fastai-notes/deeplearning1/nbs/'
path = LESSON_HOME_DIR+'data/state/'
batch_size=64
In [5]:
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
In [6]:
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
Rather than using batches, we could just import all the data into an array to save some processing time. (In most examples I'm using the batches, however - just because that's how I happened to start out.)
In [8]:
trn = get_data(path+'train')
val = get_data(path+'valid')
In [9]:
save_array(path+'results/val.dat', val)
save_array(path+'results/trn.dat', trn)
In [10]:
??get_data()
"""
def get_data(path, target_size=(224,224)):
batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range(batches.nb_sample)])
"""
In [11]:
??save_array()
"""
def save_array(fname, arr):
c=bcolz.carray(arr, rootdir=fname, mode='w')
c.flush()
"""
In [18]:
val = load_array(path+'results/val.dat')
trn = load_array(path+'results/trn.dat')
We should find that everything that worked on the sample (see statefarm-sample.ipynb), works on the full dataset too. Only better! Because now we have more data. So let's see how they go - the models in this section are exact copies of the sample notebook models.
In [7]:
def conv1(batches):
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,224,224)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dense(10, activation='softmax')
])
# model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
# model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches,
# nb_val_samples=val_batches.nb_sample)
model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
return model
In [8]:
model = conv1(batches)
In [12]:
model.save_weights(path+'models/model1.h1')
In [1]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05,
shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches = get_batches(path+'train', gen_t, batch_size=batch_size)
In [12]:
model = conv1(batches)
Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful - in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all. So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout. (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)
In [6]:
vgg = Vgg16()
model = vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx+1]
In [7]:
conv_model = Sequential(conv_layers)
In [8]:
# batches shuffle must be set to False when pre-computing features
batches = get_batches(path+'train', batch_size=batch_size, shuffle=False)
In [17]:
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
In [18]:
conv_feat = conv_model.predict_generator(batches, batches.nb_sample)
conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)
conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
In [ ]:
test_batches = get_batches(path+'test', batch_size=batch_size, shuffle=False)
conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
In [20]:
# save_array(path+'results/conv_val_feat.dat', conv_val_feat)
save_array(path+'results/conv_test_feat.dat', conv_test_feat)
# save_array(path+'results/conv_feat.dat', conv_feat)
In [ ]:
conv_feat = load_array(path+'results/conv_feat.dat')
conv_val_feat = load_array(path+'results/conv_val_feat.dat')
conv_val_feat.shape
In [ ]:
We'll find a good clipping amount using the validation set, prior to submitting.
In [10]:
def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)
In [17]:
val_preds = model.predict(val, batch_size = batch_size)
In [21]:
keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.96)).eval()
Out[21]:
In [ ]:
# test_batches = get_batches(path+'test', batch_size=batch_size, shuffle=False)
test = get_data(path+'test')
preds =model.predict(test, batch_size = batch_size*2)
In [ ]:
subm = do_clip(preds,0.96)
In [ ]:
subm_name = path+'results/subm.gz'
In [ ]:
submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'img', [a[4:] for a in test_filenames])
submission.head()
In [ ]:
submission.to_csv(subm_name, index=False, compression='gzip')
FileLink(subm_name)