In [3]:
%matplotlib inline

In [11]:
##For sample data
path = "data/distracted-driving/sample/"
results_path = 'distracted-driving-results/sample'

##For full data
#path = "data/distracted-driving/"
#results_path = 'distracted-driving-results/sample'

In [5]:
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

In [6]:
import utils; reload(utils)
from utils import plots


Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)
Using Theano backend.

In [6]:
batch_size=64

In [7]:
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16

In [8]:
vgg = Vgg16()

In [9]:
batches = vgg.get_batches(path+'train', batch_size=4)


Found 19487 images belonging to 10 classes.

In [10]:
imgs,labels = next(batches)
plots(imgs, titles=labels)



In [11]:
vgg.predict(imgs, True)


Out[11]:
(array([ 0.8783,  0.4636,  0.4592,  0.5569], dtype=float32),
 array([785, 785, 656, 785]),
 [u'seat_belt', u'seat_belt', u'minivan', u'seat_belt'])

In [12]:
vgg.finetune(batches)

In [13]:
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size)


Found 2237 images belonging to 10 classes.

In [14]:
vgg.fit(batches, val_batches, nb_epoch=1)


Epoch 1/1
19487/19487 [==============================] - 564s - loss: 2.4660 - acc: 0.5121 - val_loss: 0.5290 - val_acc: 0.8418

In [15]:
vgg.predict(imgs, True)


Out[15]:
(array([ 0.9858,  0.9999,  0.4853,  0.9999], dtype=float32),
 array([4, 4, 4, 5]),
 ['c4', 'c4', 'c4', 'c5'])

In [17]:
batches, preds = vgg.test(path+'test', batch_size = 32)


Found 79726 images belonging to 1 classes.

In [18]:
#Save our test results arrays so we can use them again later
filenames = batches.filenames
np.save(path + 'test_preds.dat', preds)
np.save(path + 'filenames.dat', filenames)

In [12]:
#Load our test predictions from file
preds = np.load(path + '/test_preds.dat.npy')
filenames = np.load(path + '/filenames.dat.npy')

In [13]:
preds


Out[13]:
array([[  1.6076e-02,   9.0095e-06,   2.0508e-10, ...,   2.6182e-05,   1.1057e-05,   9.1267e-01],
       [  5.5489e-01,   4.4217e-02,   1.8653e-08, ...,   8.3028e-02,   2.9943e-01,   6.5743e-03],
       [  1.3442e-05,   6.7272e-08,   5.2875e-06, ...,   9.6147e-10,   6.5628e-01,   2.4434e-06],
       ..., 
       [  2.4027e-02,   1.2791e-04,   5.7995e-10, ...,   1.6099e-01,   5.8294e-01,   1.3516e-07],
       [  9.2921e-01,   5.6042e-06,   2.1042e-07, ...,   9.1156e-04,   3.5594e-04,   6.1193e-04],
       [  7.9547e-01,   5.5362e-06,   5.1936e-09, ...,   3.9074e-07,   4.3125e-05,   1.2740e-03]], dtype=float32)

In [20]:
file_ids = np.array([f[7:] for f in filenames])
file_ids.T


Out[20]:
array(['img_81601.jpg', 'img_14887.jpg', 'img_62885.jpg', ..., 'img_98750.jpg', 'img_42858.jpg',
       'img_98905.jpg'], 
      dtype='|S14')

In [18]:
import pandas as pd

df = pd.DataFrame(data=preds, columns=['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9'])
df.head()


Out[18]:
c0 c1 c2 c3 c4 c5 c6 c7 c8 c9
0 1.607646e-02 9.009521e-06 2.050786e-10 1.484431e-05 0.000071 4.667617e-04 0.070657 2.618243e-05 0.000011 0.912667
1 5.548896e-01 4.421740e-02 1.865346e-08 2.454511e-04 0.000436 1.922752e-03 0.009259 8.302806e-02 0.299428 0.006574
2 1.344189e-05 6.727246e-08 5.287497e-06 1.516627e-03 0.322349 1.065140e-05 0.019819 9.614698e-10 0.656283 0.000002
3 1.064366e-07 1.235221e-05 5.170768e-04 1.782592e-07 0.001629 2.265113e-08 0.006913 1.562694e-05 0.990537 0.000376
4 6.393413e-02 5.363804e-03 1.834831e-05 7.161833e-04 0.000056 1.169559e-03 0.729950 1.341563e-02 0.171388 0.013988

In [24]:
filename_series = pd.Series(file_ids)
df['img'] = filename_series
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols] 
df.head()


Out[24]:
img c0 c1 c2 c3 c4 c5 c6 c7 c8 c9
0 img_81601.jpg 1.607646e-02 9.009521e-06 2.050786e-10 1.484431e-05 0.000071 4.667617e-04 0.070657 2.618243e-05 0.000011 0.912667
1 img_14887.jpg 5.548896e-01 4.421740e-02 1.865346e-08 2.454511e-04 0.000436 1.922752e-03 0.009259 8.302806e-02 0.299428 0.006574
2 img_62885.jpg 1.344189e-05 6.727246e-08 5.287497e-06 1.516627e-03 0.322349 1.065140e-05 0.019819 9.614698e-10 0.656283 0.000002
3 img_45125.jpg 1.064366e-07 1.235221e-05 5.170768e-04 1.782592e-07 0.001629 2.265113e-08 0.006913 1.562694e-05 0.990537 0.000376
4 img_22633.jpg 6.393413e-02 5.363804e-03 1.834831e-05 7.161833e-04 0.000056 1.169559e-03 0.729950 1.341563e-02 0.171388 0.013988

In [26]:
df.to_csv('submission.csv', index=False)

In [ ]: