In [1]:

    
%matplotlib inline









    



/home/ubuntu/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')



In [2]:

    
from __future__ import division,print_function
import shutil
import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

Prepare data



In [3]:

    
%ls









    



hw1.ipynb  utils.py  vgg16.py



In [5]:

    
%mkdir ~/nbs/hw1/data
%cd ~/nbs/hw1/data
%mkdir valid sample
%mkdir sample/valid sample/train









    



/home/ubuntu/nbs/hw1/data



In [7]:

    
%cd ~/nbs/hw1/data/train
imgs = glob('*.jpg')
rand = np.random.permutation(imgs)
for i in range(1999): os.rename(rand[i], '../valid/'+rand[i])
%ls -l ../valid | wc -l









    



/home/ubuntu/nbs/hw1/data/train
2000



In [8]:

    
from shutil import copyfile



In [9]:

    
img = glob('*.jpg')
img_rand = np.random.permutation(img)
for i in range(49): copyfile(img_rand[i], '../sample/valid/'+img_rand[i])
%ls -l ../sample/valid | wc -l



In [10]:

    
img = glob('*.jpg')
img_rand = np.random.permutation(img)
for i in range(199): copyfile(img_rand[i], '../sample/train/'+img_rand[i])
%ls -l ../sample/train | wc -l



In [11]:

    
%cd ~/nbs/hw1/data/train
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/









    



/home/ubuntu/nbs/hw1/data/train



In [12]:

    
%cd ~/nbs/hw1/data/valid
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/









    



/home/ubuntu/nbs/hw1/data/valid



In [13]:

    
%cd ~/nbs/hw1/data/sample/train
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/









    



/home/ubuntu/nbs/hw1/data/sample/train



In [14]:

    
%cd ~/nbs/hw1/data/sample/valid
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/









    



/home/ubuntu/nbs/hw1/data/sample/valid

Train model



In [15]:

    
%cd ~/nbs/hw1









    



/home/ubuntu/nbs/hw1



In [16]:

    
path = "data/"
#path = "data/sample/"



In [17]:

    
import utils; reload(utils)
from utils import plots









    



WARNING (theano.sandbox.cuda): CUDA is installed, but device gpu is not available  (error: Unable to get the number of gpus available: no CUDA-capable device is detected)
Using Theano backend.






    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-17-834d59d32016> in <module>()
----> 1 import utils; reload(utils)
      2 from utils import plots

/home/ubuntu/nbs/hw1/utils.py in <module>()
     50 
     51 from vgg16 import *
---> 52 from vgg16bn import *
     53 np.set_printoptions(precision=4, linewidth=100)
     54 

ImportError: No module named vgg16bn



In [ ]:

    
# As large as you can, but no larger than 64 is recommended. 
# If you have an older or cheaper GPU, you'll run out of memory, so will have to decrease this.
batch_size=64



In [ ]:

    
# Import our class, and instantiate
from vgg16 import Vgg16



In [70]:

    
vgg = Vgg16()



In [71]:

    
batches = vgg.get_batches(path+'train', batch_size=batch_size)
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size*2)









    



Found 22001 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.



In [72]:

    
vgg.finetune(batches)



In [ ]:

    
vgg.fit(batches, val_batches, nb_epoch=1)









    



Epoch 1/1
 1664/22001 [=>............................] - ETA: 8836s - loss: 0.4535 - acc: 0.8870



In [30]:

    
vgg.model.save_weights(path+'results/ft1.h5')

Predict on new test data for kaggle



In [8]:

    
ft1 = vgg.model.load_weights(path+'results/ft1.h5')



In [ ]:

    
batches = vgg.test(path+'test', batch_size=batch_size*2)









    



Found 12500 images belonging to 1 classes.



In [ ]:

    
batches



In [ ]:

    
predictions = vgg.model.predict_generator(batches, batches.nb_sample)



In [ ]: