notebook.community

Edit and run



In [3]:

    
%matplotlib inline



In [11]:

    
##For sample data
path = "data/distracted-driving/sample/"
results_path = 'distracted-driving-results/sample'

##For full data
#path = "data/distracted-driving/"
#results_path = 'distracted-driving-results/sample'



In [5]:

    
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt



In [6]:

    
import utils; reload(utils)
from utils import plots









    



Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
Using Theano backend.



In [6]:

    
batch_size=64



In [7]:

    
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16



In [8]:

    
vgg = Vgg16()



In [9]:

    
batches = vgg.get_batches(path+'train', batch_size=4)









    



Found 19487 images belonging to 10 classes.



In [10]:

    
imgs,labels = next(batches)
plots(imgs, titles=labels)



In [11]:

    
vgg.predict(imgs, True)









    Out[11]:





(array([ 0.8783,  0.4636,  0.4592,  0.5569], dtype=float32),
 array([785, 785, 656, 785]),
 [u'seat_belt', u'seat_belt', u'minivan', u'seat_belt'])



In [12]:

    
vgg.finetune(batches)



In [13]:

    
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size)









    



Found 2237 images belonging to 10 classes.



In [14]:

    
vgg.fit(batches, val_batches, nb_epoch=1)









    



Epoch 1/1
19487/19487 [==============================] - 564s - loss: 2.4660 - acc: 0.5121 - val_loss: 0.5290 - val_acc: 0.8418



In [15]:

    
vgg.predict(imgs, True)









    Out[15]:





(array([ 0.9858,  0.9999,  0.4853,  0.9999], dtype=float32),
 array([4, 4, 4, 5]),
 ['c4', 'c4', 'c4', 'c5'])



In [17]:

    
batches, preds = vgg.test(path+'test', batch_size = 32)









    



Found 79726 images belonging to 1 classes.



In [18]:

    
#Save our test results arrays so we can use them again later
filenames = batches.filenames
np.save(path + 'test_preds.dat', preds)
np.save(path + 'filenames.dat', filenames)



In [12]:

    
#Load our test predictions from file
preds = np.load(path + '/test_preds.dat.npy')
filenames = np.load(path + '/filenames.dat.npy')



In [13]:

    
preds









    Out[13]:





array([[  1.6076e-02,   9.0095e-06,   2.0508e-10, ...,   2.6182e-05,   1.1057e-05,   9.1267e-01],
       [  5.5489e-01,   4.4217e-02,   1.8653e-08, ...,   8.3028e-02,   2.9943e-01,   6.5743e-03],
       [  1.3442e-05,   6.7272e-08,   5.2875e-06, ...,   9.6147e-10,   6.5628e-01,   2.4434e-06],
       ..., 
       [  2.4027e-02,   1.2791e-04,   5.7995e-10, ...,   1.6099e-01,   5.8294e-01,   1.3516e-07],
       [  9.2921e-01,   5.6042e-06,   2.1042e-07, ...,   9.1156e-04,   3.5594e-04,   6.1193e-04],
       [  7.9547e-01,   5.5362e-06,   5.1936e-09, ...,   3.9074e-07,   4.3125e-05,   1.2740e-03]], dtype=float32)



In [20]:

    
file_ids = np.array([f[7:] for f in filenames])
file_ids.T









    Out[20]:





array(['img_81601.jpg', 'img_14887.jpg', 'img_62885.jpg', ..., 'img_98750.jpg', 'img_42858.jpg',
       'img_98905.jpg'], 
      dtype='|S14')



In [18]:

    
import pandas as pd

df = pd.DataFrame(data=preds, columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
df.head()









    Out[18]:






  
    
      
      c0
      c1
      c2
      c3
      c4
      c5
      c6
      c7
      c8
      c9
    
  
  
    
      0
      1.607646e-02
      9.009521e-06
      2.050786e-10
      1.484431e-05
      0.000071
      4.667617e-04
      0.070657
      2.618243e-05
      0.000011
      0.912667
    
    
      1
      5.548896e-01
      4.421740e-02
      1.865346e-08
      2.454511e-04
      0.000436
      1.922752e-03
      0.009259
      8.302806e-02
      0.299428
      0.006574
    
    
      2
      1.344189e-05
      6.727246e-08
      5.287497e-06
      1.516627e-03
      0.322349
      1.065140e-05
      0.019819
      9.614698e-10
      0.656283
      0.000002
    
    
      3
      1.064366e-07
      1.235221e-05
      5.170768e-04
      1.782592e-07
      0.001629
      2.265113e-08
      0.006913
      1.562694e-05
      0.990537
      0.000376
    
    
      4
      6.393413e-02
      5.363804e-03
      1.834831e-05
      7.161833e-04
      0.000056
      1.169559e-03
      0.729950
      1.341563e-02
      0.171388
      0.013988



In [24]:

    
filename_series = pd.Series(file_ids)
df['img'] = filename_series
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols] 
df.head()









    Out[24]:






  
    
      
      img
      c0
      c1
      c2
      c3
      c4
      c5
      c6
      c7
      c8
      c9
    
  
  
    
      0
      img_81601.jpg
      1.607646e-02
      9.009521e-06
      2.050786e-10
      1.484431e-05
      0.000071
      4.667617e-04
      0.070657
      2.618243e-05
      0.000011
      0.912667
    
    
      1
      img_14887.jpg
      5.548896e-01
      4.421740e-02
      1.865346e-08
      2.454511e-04
      0.000436
      1.922752e-03
      0.009259
      8.302806e-02
      0.299428
      0.006574
    
    
      2
      img_62885.jpg
      1.344189e-05
      6.727246e-08
      5.287497e-06
      1.516627e-03
      0.322349
      1.065140e-05
      0.019819
      9.614698e-10
      0.656283
      0.000002
    
    
      3
      img_45125.jpg
      1.064366e-07
      1.235221e-05
      5.170768e-04
      1.782592e-07
      0.001629
      2.265113e-08
      0.006913
      1.562694e-05
      0.990537
      0.000376
    
    
      4
      img_22633.jpg
      6.393413e-02
      5.363804e-03
      1.834831e-05
      7.161833e-04
      0.000056
      1.169559e-03
      0.729950
      1.341563e-02
      0.171388
      0.013988



In [26]:

    
df.to_csv('submission.csv', index=False)



In [ ]:

	c0	c1	c2	c3	c4	c5	c6	c7	c8	c9
0	1.607646e-02	9.009521e-06	2.050786e-10	1.484431e-05	0.000071	4.667617e-04	0.070657	2.618243e-05	0.000011	0.912667
1	5.548896e-01	4.421740e-02	1.865346e-08	2.454511e-04	0.000436	1.922752e-03	0.009259	8.302806e-02	0.299428	0.006574
2	1.344189e-05	6.727246e-08	5.287497e-06	1.516627e-03	0.322349	1.065140e-05	0.019819	9.614698e-10	0.656283	0.000002
3	1.064366e-07	1.235221e-05	5.170768e-04	1.782592e-07	0.001629	2.265113e-08	0.006913	1.562694e-05	0.990537	0.000376
4	6.393413e-02	5.363804e-03	1.834831e-05	7.161833e-04	0.000056	1.169559e-03	0.729950	1.341563e-02	0.171388	0.013988

	img	c0	c1	c2	c3	c4	c5	c6	c7	c8	c9
0	img_81601.jpg	1.607646e-02	9.009521e-06	2.050786e-10	1.484431e-05	0.000071	4.667617e-04	0.070657	2.618243e-05	0.000011	0.912667
1	img_14887.jpg	5.548896e-01	4.421740e-02	1.865346e-08	2.454511e-04	0.000436	1.922752e-03	0.009259	8.302806e-02	0.299428	0.006574
2	img_62885.jpg	1.344189e-05	6.727246e-08	5.287497e-06	1.516627e-03	0.322349	1.065140e-05	0.019819	9.614698e-10	0.656283	0.000002
3	img_45125.jpg	1.064366e-07	1.235221e-05	5.170768e-04	1.782592e-07	0.001629	2.265113e-08	0.006913	1.562694e-05	0.990537	0.000376
4	img_22633.jpg	6.393413e-02	5.363804e-03	1.834831e-05	7.161833e-04	0.000056	1.169559e-03	0.729950	1.341563e-02	0.171388	0.013988