notebook.community

Edit and run



In [1]:

    
# step to run on theano. Data is mounted at /data
# floyd run --mode jupyter --gpu --env theano:py2 --data matsaleh/datasets/vgg16_model/1:data



In [20]:

    
# if bcolz gives error, uncomment and run
# !pip install bcolz
# if keras gives error on importing l2, uncomment and run following
# !pip uninstall -y keras
# !pip install keras==1.2.2



In [ ]:

    
# !wget https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/sample_submission.csv -P data
# !ls -ltr /output/data



In [21]:

    
# !mkdir data
# !wget https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/test.zip -P data
# !wget https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/download/train.zip -P data
# !unzip /output/data/test.zip
# !unzip /output/data/train.zip









    



/bin/sh: wget: command not found
/bin/sh: wget: command not found



In [4]:

    
%matplotlib inline



In [5]:

    
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/'



In [6]:

    
#Create directories
%cd $DATA_HOME_DIR
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown









    



[Errno 2] No such file or directory: '/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs/data/redux'
/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs



In [7]:

    
%cd $DATA_HOME_DIR/train
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(2000): 
    os.rename(shuf[i], DATA_HOME_DIR+'/valid/' + shuf[i])
    
from shutil import copyfile
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200): 
    copyfile(shuf[i], DATA_HOME_DIR+'/sample/train/' + shuf[i])

%cd $DATA_HOME_DIR/valid
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(50): 
    copyfile(shuf[i], DATA_HOME_DIR+'/sample/valid/' + shuf[i])
    
#Divide cat/dog images into separate directories

%cd $DATA_HOME_DIR/sample/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd $DATA_HOME_DIR/sample/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd $DATA_HOME_DIR/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd $DATA_HOME_DIR/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

# Create single 'unknown' class for test set
%cd $DATA_HOME_DIR/test
%mv *.jpg unknown/









    



[Errno 2] No such file or directory: '/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs/data/redux/train'
/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs






    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-7-403b789fd07d> in <module>()
      1 get_ipython().magic(u'cd $DATA_HOME_DIR/train')
----> 2 g = glob('*.jpg')
      3 shuf = np.random.permutation(g)
      4 for i in range(2000):
      5     os.rename(shuf[i], DATA_HOME_DIR+'/valid/' + shuf[i])

NameError: name 'glob' is not defined



In [8]:

    
%cd $DATA_HOME_DIR

#Set path to sample/ path if desired
path = DATA_HOME_DIR + '/' #'/sample/'
test_path = DATA_HOME_DIR + '/test/' #We use all the test data
results_path=DATA_HOME_DIR + '/results/'
train_path=path + '/train/'
valid_path=path + '/valid/'









    



[Errno 2] No such file or directory: '/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs/data/redux'
/Users/nikhil/workspace/www/ml/fastai/course-floydhub/deeplearning1/nbs



In [9]:

    
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt



In [10]:

    
import utils; reload(utils)
from utils import plots









    



Using Theano backend.



In [13]:

    
# As large as you can, but no larger than 64 is recommended. 
# If you have an older or cheaper GPU, you'll run out of memory, so will have to decrease this.
batch_size=64
no_of_epochs=3



In [12]:

    
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16



In [14]:

    
%%time
#Finetune the model
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
vgg.finetune(batches)

#Not sure if we set this for all fits
vgg.model.optimizer.lr = 0.01
latest_weights_filename = 'ft0d.h5'
vgg.model.save_weights(results_path+latest_weights_filename)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-5935ea56d849> in <module>()
      1 #Finetune the model
----> 2 batches = vgg.get_batches(train_path, batch_size=batch_size)
      3 val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
      4 vgg.finetune(batches)
      5 

NameError: name 'vgg' is not defined



In [15]:

    
batches, preds = vgg.test(test_path, batch_size = batch_size*2)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-662f30f813d7> in <module>()
----> 1 batches, preds = vgg.test(test_path, batch_size = batch_size*2)

NameError: name 'vgg' is not defined



In [16]:

    
#For every image, vgg.test() generates two probabilities 
#based on how we've ordered the cats/dogs directories.
#It looks like column one is cats and column two is dogs
print preds[:5]

filenames = batches.filenames
print filenames[:5]









    



  File "<ipython-input-16-a04bd1fb4660>", line 4
    print preds[:5]
              ^
SyntaxError: invalid syntax



In [17]:

    
#Save our test results arrays so we can use them again later
save_array(results_path + 'test_preds.dat', preds)
save_array(results_path + 'filenames.dat', filenames)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-a683906127e6> in <module>()
      1 #Save our test results arrays so we can use them again later
----> 2 save_array(results_path + 'test_preds.dat', preds)
      3 save_array(results_path + 'filenames.dat', filenames)

NameError: name 'save_array' is not defined



In [18]:

    
#Load our test predictions from file
preds = load_array(results_path + 'test_preds.dat')
filenames = load_array(results_path + 'filenames.dat')









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-da0fca3c6bc6> in <module>()
      1 #Load our test predictions from file
----> 2 preds = load_array(results_path + 'test_preds.dat')
      3 filenames = load_array(results_path + 'filenames.dat')

NameError: name 'load_array' is not defined



In [ ]: