notebook.community

Edit and run



In [2]:

    
%matplotlib inline



In [3]:

    
path = "data/dogscats-redux/"



In [4]:

    
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt



In [5]:

    
import utils; reload(utils)
from utils import plots









    



Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
Using Theano backend.



In [6]:

    
batch_size=64



In [7]:

    
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16



In [8]:

    
vgg = Vgg16()



In [9]:

    
batches = vgg.get_batches(path+'train', batch_size=4)









    



Found 22776 images belonging to 2 classes.



In [10]:

    
imgs,labels = next(batches)
plots(imgs, titles=labels)



In [11]:

    
vgg.predict(imgs, True)









    Out[11]:





(array([ 0.5245,  0.1812,  0.1423,  0.9877], dtype=float32),
 array([207, 281, 332, 284]),
 [u'golden_retriever', u'tabby', u'Angora', u'Siamese_cat'])



In [12]:

    
vgg.finetune(batches)



In [13]:

    
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size)









    



Found 2200 images belonging to 2 classes.



In [14]:

    
vgg.fit(batches, val_batches, nb_epoch=1)









    



Epoch 1/1
22776/22776 [==============================] - 664s - loss: 0.2304 - acc: 0.9705 - val_loss: 0.1941 - val_acc: 0.9805



In [15]:

    
vgg.predict(imgs, True)









    Out[15]:





(array([ 1.,  1.,  1.,  1.], dtype=float32),
 array([1, 0, 0, 0]),
 ['dogs', 'cats', 'cats', 'cats'])



In [32]:

    
batches = vgg.get_batches(path+'train', batch_size=32, shuffle=False)
imgs,labels = next(batches)
results = vgg.predict(imgs, True)









    



Found 22776 images belonging to 2 classes.



In [33]:

    
plots(imgs[0:4], titles=labels[0:4])



In [41]:

    
batches, preds = vgg.test(path+'test', batch_size = 32)









    



Found 12500 images belonging to 1 classes.



In [44]:

    
#Save our test results arrays so we can use them again later
filenames = batches.filenames
np.save(path + 'test_preds.dat', preds)
np.save(path + 'filenames.dat', filenames)



In [49]:

    
preds









    Out[49]:





array([[  1.0000e+00,   1.2281e-36],
       [  9.9999e-01,   1.0085e-05],
       [  1.0000e+00,   3.2340e-37],
       ..., 
       [  1.0000e+00,   2.2421e-44],
       [  1.0000e+00,   1.4013e-45],
       [  4.5341e-20,   1.0000e+00]], dtype=float32)



In [59]:

    
batches.filenames
ids = np.array([int(f[7:f.find('.')]) for f in filenames])
print(ids[:5])
print(filenames[:5])









    



[ 9292 12026  9688  4392   779]
['unkown/9292.jpg', 'unkown/12026.jpg', 'unkown/9688.jpg', 'unkown/4392.jpg', 'unkown/779.jpg']



In [63]:

    
isdog = preds[:,1]



In [64]:

    
subm = np.stack([ids,isdog], axis=1)
subm[:5]









    Out[64]:





array([[  9.2920e+03,   1.2281e-36],
       [  1.2026e+04,   1.0085e-05],
       [  9.6880e+03,   3.2340e-37],
       [  4.3920e+03,   2.8026e-45],
       [  7.7900e+02,   1.0000e+00]])



In [65]:

    
np.savetxt(path+'submission.csv', subm, fmt='%d,%.5f', header='id,label', comments='')



In [ ]: