In [1]:
%matplotlib inline
import os, sys
sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
from utils import *
from PIL import Image
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from sklearn.metrics import confusion_matrix
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'
categories = sorted([os.path.basename(x) for x in glob(DATA_HOME_DIR+'/train/*')])
categories
Out[2]:
In [3]:
def plot_history(h):
plt.plot(h['acc'])
plt.plot(h['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(h['loss'])
plt.plot(h['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
def underfit(train_err, test_error):
return train_err < test_error * 0.667
def overfit(train_acc, test_acc):
return train_acc > test_acc
def validation_histogram():
probs = bn_model.predict(conv_val_feat, batch_size=batch_size)
expected_labels = val_batches.classes
our_labels = np.argmax(probs, axis=1)
data = np.vstack([expected_labels, our_labels]).T
plt.style.use('seaborn-deep')
plt.hist(data, range(11), alpha=0.7, label=['expected', 'ours'])
plt.legend(loc='upper right')
plt.show()
def validation_np_histogram():
probs = bn_model.predict(conv_val_feat, batch_size=batch_size)
expected_labels = val_batches.classes
our_labels = np.argmax(probs, axis=1)
print np.histogram(our_labels,range(11))[0]
def validation_confusion():
probs = bn_model.predict(conv_val_feat, batch_size=batch_size)
expected_labels = val_batches.classes
our_labels = np.argmax(probs, axis=1)
cm = confusion_matrix(expected_labels, our_labels)
plot_confusion_matrix(cm, val_batches.class_indices)
In [4]:
%cd $DATA_HOME_DIR
#Set path to sample/ path if desired
path = DATA_HOME_DIR + '/'
#path = DATA_HOME_DIR + '/sample/'
test_path = DATA_HOME_DIR + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'
histories = {}
In [5]:
batch_size=64
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
val_batches = get_batches(path+'valid', batch_size=batch_size, shuffle=False)
In [6]:
# use the batch normalization VGG
#vgg = Vgg16BN()
# No. He's not using BN yet, so let's just stop that
vgg = Vgg16()
In [7]:
model=vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx+1]
print "input shape to dense layer", conv_layers[-1].output_shape[1:]
In [8]:
conv_model = Sequential(conv_layers)
In [9]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05,
shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
# Hmmm, don't we want to train with more augmented images?
#batches = get_batches(path+'train', gen_t, batch_size=batch_size)
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
#test_batches = get_batches(path+'test', batch_size=batch_size*2, shuffle=False)
conv_feat = conv_model.predict_generator(batches, batches.nb_sample)
conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)
# With 8GB I get memory error on this. 79k images is too many
# Trying 16GB...bah! I *still* get a memory error. WTF? I see 10.6GB out of 16GB used.
#conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
In [10]:
save_array(path+'results/conv_feat.dat', conv_feat)
save_array(path+'results/conv_val_feat.dat', conv_val_feat)
#save_array(path+'results/conv_test_feat.dat', conv_test_feat)
In [ ]:
conv_feat = load_array(path+'results/conv_feat.dat')
conv_val_feat = load_array(path+'results/conv_val_feat.dat')
In [ ]:
def get_bn_layers(p,input_shape):
return [
MaxPooling2D(input_shape=input_shape),
Flatten(),
Dropout(p/2),
#Dense(128, activation='relu'),
Dense(256, activation='relu'),
BatchNormalization(),
Dropout(p/2),
Dense(128, activation='relu'),
BatchNormalization(),
Dropout(p),
Dense(10, activation='softmax')
]
In [ ]:
p=0.5 # wow isn't this high?
In [ ]:
bn_model = Sequential(get_bn_layers(p,conv_val_feat.shape[1:]))
bn_model.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
In [ ]:
# starting with super-small lr first
bn_model.optimizer.lr=0.000001
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=1,
validation_data=(conv_val_feat, val_labels))
# okay at least with 16GB this is much quicker. 6s vs. 200+ due to swapping. I"m at 10.6GiB memory.
In [ ]:
validation_histogram()
In [ ]:
validation_histogram()
In [ ]:
validation_histogram()
Okay, I get a completely different result than what he gets. I'm using the samples, though.
Let's try with full dataset?
In [ ]:
bn_model.optimizer.lr=0.0001
bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=3,
validation_data=(conv_val_feat, val_labels))
In [ ]:
validation_histogram()
In [ ]:
validation_histogram()
In [ ]:
validation_histogram()
In [ ]:
validation_confusion()
In [ ]:
hist = bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=5,
validation_data=(conv_val_feat, val_labels))
In [ ]:
In [ ]:
#Set constants. You can experiment with no_of_epochs to improve the model
batch_size=64
no_of_epochs=30
In [ ]:
# Augment the data
gen = image.ImageDataGenerator(rotation_range=15, #=0,
height_shift_range=0.05,#=0.1,
width_shift_range=0.1,
shear_range=0.05,#=0
channel_shift_range=20,#=0
#zoom_range=0.1
#, horizontal_flip=True
)
In [ ]:
# Finetune the model
# just add gen as 2nd parameter to batches & not val_batches
batches = vgg.get_batches(train_path, gen, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
vgg.finetune(batches)
In [ ]:
INIT_LR0=0.00001
INIT_LR=0.001
EPOCHS_DROP=5.0
DROP=0.5
def step_decay0(epoch, initial_lrate = INIT_LR0, epochs_drop = EPOCHS_DROP, drop = DROP):
lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
return lrate
def step_decay(epoch, initial_lrate = INIT_LR, epochs_drop = EPOCHS_DROP, drop = DROP):
lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
return lrate
In [ ]:
#latest_weights_filename="weights-02-04-1.00.hdf5"
#vgg.model.load_weights(results_path+latest_weights_filename)
#run_index=0 # restarting fresh
run_index+=1
filepath=results_path+"run-%02d-weights-{epoch:02d}-{val_acc:.2f}.hdf5"%(run_index)
history_filepath=results_path+"run-%02d-history.csv"%(run_index)
checkpoint = ModelCheckpoint(filepath,
#monitor='val_acc', mode='max',
monitor='val_loss', mode='min',
verbose=1,
save_weights_only=True, save_best_only=True)
lr_scheduler0 = LearningRateScheduler(step_decay0)
lr_scheduler = LearningRateScheduler(step_decay)
callbacks = [checkpoint,lr_scheduler]
In [ ]:
# okay, so he says we need to first start with super-low learning rate just to get things started
history0 = vgg.fit(batches, val_batches, 3, [checkpoint,lr_scheduler0])
# then, let's try again with more reasonable learning rate
history = vgg.fit(batches, val_batches, no_of_epochs, callbacks)
history_df = pd.DataFrame(history.history)
history_df.to_csv(history_filepath)
histories[run_index] = history_df
In [ ]:
histories.keys()
In [ ]:
history_df["underfit"] = map(underfit, history_df["loss"], history_df["val_loss"])
history_df["overfit"] = map(overfit, history_df["acc"], history_df["val_acc"])
plot_history(histories[11])
plot_history(histories[10])
In [ ]:
history_df["underfit"] = map(underfit, history_df["loss"], history_df["val_loss"])
history_df["overfit"] = map(overfit, history_df["acc"], history_df["val_acc"])
plot_history(history_df)
In [ ]:
history_df
In [ ]:
batch_size=64
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
(val_classes, trn_classes, val_labels, trn_labels,
val_filenames, filenames, test_filenames) = get_classes(path)
In [ ]:
def conv1():
model = Sequential([
BatchNormalization(axis=1, input_shape=(3,224,224)),
Convolution2D(32,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Convolution2D(64,3,3, activation='relu'),
BatchNormalization(axis=1),
MaxPooling2D((3,3)),
Flatten(),
Dense(200, activation='relu'),
BatchNormalization(),
Dense(10, activation='softmax')
])
model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
model.optimizer.lr = 0.001
model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches,
nb_val_samples=val_batches.nb_sample)
return model
In [ ]:
model = conv1()
In [ ]:
# that worked!
In [ ]: