In [1]:
%matplotlib inline


/home/ubuntu/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [2]:
from __future__ import division,print_function
import shutil
import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

Prepare data


In [3]:
%ls


hw1.ipynb  utils.py  vgg16.py

In [5]:
%mkdir ~/nbs/hw1/data
%cd ~/nbs/hw1/data
%mkdir valid sample
%mkdir sample/valid sample/train


/home/ubuntu/nbs/hw1/data

In [7]:
%cd ~/nbs/hw1/data/train
imgs = glob('*.jpg')
rand = np.random.permutation(imgs)
for i in range(1999): os.rename(rand[i], '../valid/'+rand[i])
%ls -l ../valid | wc -l


/home/ubuntu/nbs/hw1/data/train
2000

In [8]:
from shutil import copyfile

In [9]:
img = glob('*.jpg')
img_rand = np.random.permutation(img)
for i in range(49): copyfile(img_rand[i], '../sample/valid/'+img_rand[i])
%ls -l ../sample/valid | wc -l


51

In [10]:
img = glob('*.jpg')
img_rand = np.random.permutation(img)
for i in range(199): copyfile(img_rand[i], '../sample/train/'+img_rand[i])
%ls -l ../sample/train | wc -l


200

In [11]:
%cd ~/nbs/hw1/data/train
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/


/home/ubuntu/nbs/hw1/data/train

In [12]:
%cd ~/nbs/hw1/data/valid
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/


/home/ubuntu/nbs/hw1/data/valid

In [13]:
%cd ~/nbs/hw1/data/sample/train
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/


/home/ubuntu/nbs/hw1/data/sample/train

In [14]:
%cd ~/nbs/hw1/data/sample/valid
%mkdir cats dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/


/home/ubuntu/nbs/hw1/data/sample/valid

Train model


In [15]:
%cd ~/nbs/hw1


/home/ubuntu/nbs/hw1

In [16]:
path = "data/"
#path = "data/sample/"

In [17]:
import utils; reload(utils)
from utils import plots


WARNING (theano.sandbox.cuda): CUDA is installed, but device gpu is not available  (error: Unable to get the number of gpus available: no CUDA-capable device is detected)
Using Theano backend.
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-17-834d59d32016> in <module>()
----> 1 import utils; reload(utils)
      2 from utils import plots

/home/ubuntu/nbs/hw1/utils.py in <module>()
     50 
     51 from vgg16 import *
---> 52 from vgg16bn import *
     53 np.set_printoptions(precision=4, linewidth=100)
     54 

ImportError: No module named vgg16bn

In [ ]:
# As large as you can, but no larger than 64 is recommended. 
# If you have an older or cheaper GPU, you'll run out of memory, so will have to decrease this.
batch_size=64

In [ ]:
# Import our class, and instantiate
from vgg16 import Vgg16

In [70]:
vgg = Vgg16()

In [71]:
batches = vgg.get_batches(path+'train', batch_size=batch_size)
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size*2)


Found 22001 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.

In [72]:
vgg.finetune(batches)

In [ ]:
vgg.fit(batches, val_batches, nb_epoch=1)


Epoch 1/1
 1664/22001 [=>............................] - ETA: 8836s - loss: 0.4535 - acc: 0.8870

In [30]:
vgg.model.save_weights(path+'results/ft1.h5')

Predict on new test data for kaggle


In [8]:
ft1 = vgg.model.load_weights(path+'results/ft1.h5')

In [ ]:
batches = vgg.test(path+'test', batch_size=batch_size*2)


Found 12500 images belonging to 1 classes.

In [ ]:
batches

In [ ]:
predictions = vgg.model.predict_generator(batches, batches.nb_sample)

In [ ]: