In [1]:
# step to run on theano. Data is mounted at /data
# floyd run --mode jupyter --gpu --env theano:py2 --data matsaleh/datasets/vgg16_model/1:data

In [20]:
# if bcolz gives error, uncomment and run
# !pip install bcolz
# if keras gives error on importing l2, uncomment and run following
# !pip uninstall -y keras
# !pip install keras==1.2.2

In [ ]:
# !wget https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/sample_submission.csv -P data
# !ls -ltr /output/data

In [21]:
# !mkdir data
# !wget https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/test.zip -P data
# !wget https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/train.zip -P data
# !unzip /output/data/test.zip
# !unzip /output/data/train.zip


/bin/sh: wget: command not found
/bin/sh: wget: command not found

In [4]:
%matplotlib inline

In [5]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/'

In [6]:
#Create directories
%cd $DATA_HOME_DIR
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown


[Errno 2] No such file or directory: '/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs/data/redux'
/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs

In [7]:
%cd $DATA_HOME_DIR/train
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(2000): 
    os.rename(shuf[i], DATA_HOME_DIR+'/valid/' + shuf[i])
    
from shutil import copyfile
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200): 
    copyfile(shuf[i], DATA_HOME_DIR+'/sample/train/' + shuf[i])

%cd $DATA_HOME_DIR/valid
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(50): 
    copyfile(shuf[i], DATA_HOME_DIR+'/sample/valid/' + shuf[i])
    
#Divide cat/dog images into separate directories

%cd $DATA_HOME_DIR/sample/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd $DATA_HOME_DIR/sample/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd $DATA_HOME_DIR/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd $DATA_HOME_DIR/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

# Create single 'unknown' class for test set
%cd $DATA_HOME_DIR/test
%mv *.jpg unknown/


[Errno 2] No such file or directory: '/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs/data/redux/train'
/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-403b789fd07d> in <module>()
      1 get_ipython().magic(u'cd $DATA_HOME_DIR/train')
----> 2 g = glob('*.jpg')
      3 shuf = np.random.permutation(g)
      4 for i in range(2000):
      5     os.rename(shuf[i], DATA_HOME_DIR+'/valid/' + shuf[i])

NameError: name 'glob' is not defined

In [8]:
%cd $DATA_HOME_DIR

#Set path to sample/ path if desired
path = DATA_HOME_DIR + '/' #'/sample/'
test_path = DATA_HOME_DIR + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'


[Errno 2] No such file or directory: '/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs/data/redux'
/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs

In [9]:
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

In [10]:
import utils; reload(utils)
from utils import plots


Using Theano backend.

In [13]:
# As large as you can, but no larger than 64 is recommended. 
# If you have an older or cheaper GPU, you'll run out of memory, so will have to decrease this.
batch_size=64
no_of_epochs=3

In [12]:
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16

In [14]:
%%time
#Finetune the model
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
vgg.finetune(batches)

#Not sure if we set this for all fits
vgg.model.optimizer.lr = 0.01
latest_weights_filename = 'ft0d.h5'
vgg.model.save_weights(results_path+latest_weights_filename)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-5935ea56d849> in <module>()
      1 #Finetune the model
----> 2 batches = vgg.get_batches(train_path, batch_size=batch_size)
      3 val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
      4 vgg.finetune(batches)
      5 

NameError: name 'vgg' is not defined

In [15]:
batches, preds = vgg.test(test_path, batch_size = batch_size*2)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-662f30f813d7> in <module>()
----> 1 batches, preds = vgg.test(test_path, batch_size = batch_size*2)

NameError: name 'vgg' is not defined

In [16]:
#For every image, vgg.test() generates two probabilities 
#based on how we've ordered the cats/dogs directories.
#It looks like column one is cats and column two is dogs
print preds[:5]

filenames = batches.filenames
print filenames[:5]


  File "<ipython-input-16-a04bd1fb4660>", line 4
    print preds[:5]
              ^
SyntaxError: invalid syntax

In [17]:
#Save our test results arrays so we can use them again later
save_array(results_path + 'test_preds.dat', preds)
save_array(results_path + 'filenames.dat', filenames)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-a683906127e6> in <module>()
      1 #Save our test results arrays so we can use them again later
----> 2 save_array(results_path + 'test_preds.dat', preds)
      3 save_array(results_path + 'filenames.dat', filenames)

NameError: name 'save_array' is not defined

In [18]:
#Load our test predictions from file
preds = load_array(results_path + 'test_preds.dat')
filenames = load_array(results_path + 'filenames.dat')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-da0fca3c6bc6> in <module>()
      1 #Load our test predictions from file
----> 2 preds = load_array(results_path + 'test_preds.dat')
      3 filenames = load_array(results_path + 'filenames.dat')

NameError: name 'load_array' is not defined

In [ ]: