In [ ]:
# floyd run --mode jupyter --gpu --env theano:py2 --data <path>:data
In [ ]:
# if bcolz gives error, uncomment and run
# !pip install bcolz
# if keras gives error on importing l2, uncomment and run following
# !pip uninstall -y keras
# !pip install keras==1.2.2
In [ ]:
In [25]:
import os, sys
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/dogscatsredux/'
In [ ]:
sys.path.insert(1, os.path.join(sys.path[0], '..'))
#import modules
from utils import *
from vgg16 import Vgg16
#Instantiate plotting tool
#In Jupyter notebooks, you will need to run this command before doing any plotting
%matplotlib inline
In [4]:
DATA_HOME_DIR
Out[4]:
In [8]:
#Create directories
%cd $DATA_HOME_DIR
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown
!ls -ltr
In [11]:
from __future__ import division,print_function
import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
In [12]:
%cd $DATA_HOME_DIR/train
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(2000):
os.rename(shuf[i], DATA_HOME_DIR+'/valid/' + shuf[i])
In [13]:
from shutil import copyfile
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200):
copyfile(shuf[i], DATA_HOME_DIR+'/sample/train/' + shuf[i])
In [14]:
%cd $DATA_HOME_DIR/valid
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(50):
copyfile(shuf[i], DATA_HOME_DIR+'/sample/valid/' + shuf[i])
In [ ]:
from shutil import copyfile
DATA_HOME_DIR
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200):
copyfile(shuf[i], DATA_HOME_DIR+'/sample/test/' + shuf[i])
In [15]:
#Divide cat/dog images into separate directories
%cd $DATA_HOME_DIR/sample/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/
In [16]:
%cd $DATA_HOME_DIR/sample/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/
%cd $DATA_HOME_DIR/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/
%cd $DATA_HOME_DIR/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/
In [17]:
# Create single 'unknown' class for test set
%cd $DATA_HOME_DIR/test
%mv *.jpg unknown/
In [37]:
%cd $DATA_HOME_DIR
#Set path to sample/ path if desired
path = DATA_HOME_DIR #'/sample/'
path = DATA_HOME_DIR + 'sample/'
test_path = path + 'test/'
results_path=DATA_HOME_DIR + 'results/'
train_path=path + 'train/'
valid_path=path + 'valid/'
In [29]:
vgg = Vgg16()
In [63]:
%%time
#Finetune the model
batch_size=64
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
vgg.finetune(batches)
vgg.fit(batches, val_batches, nb_epoch=1)
#Not sure if we set this for all fits
vgg.model.optimizer.lr = 0.01
latest_weights_filename = 'ft0d.h5'
vgg.model.save_weights(results_path+latest_weights_filename)
In [70]:
Out[70]:
In [71]:
batches, preds = vgg.test(test_path, batch_size = batch_size*2)
In [79]:
filenames = batches.filenames
filenames[:5]
Out[79]:
In [80]:
preds[:5]
Out[80]:
In [81]:
#You can verify the column ordering by viewing some images
from PIL import Image
Image.open(test_path + filenames[2])
Out[81]:
In [82]:
save_array(results_path + 'test_preds.dat', preds)
save_array(results_path + 'filenames.dat', filenames)
In [90]:
isDog = preds[:,1]
isDog[:5]
Out[90]:
In [85]:
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
In [89]:
ids[:5]
Out[89]:
In [88]:
subm = np.stack([ids,isDog], axis=1)
subm[:5]
Out[88]:
In [91]:
%cd $DATA_HOME_DIR
submission_file_name = 'submission1.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')
In [93]:
from IPython.display import FileLink
%cd $LESSON_HOME_DIR
FileLink('data/dogscatsredux/'+submission_file_name)
Out[93]:
In [ ]: