In [140]:
##Make matplotlib inline
%matplotlib inline

In [141]:
##Print data directory structure for reference

In [142]:
%%bash
tree -d data/distracted-driving/


data/distracted-driving/
├── sample
│   ├── test
│   │   └── unkown
│   ├── train
│   │   ├── c0
│   │   ├── c1
│   │   ├── c2
│   │   ├── c3
│   │   ├── c4
│   │   ├── c5
│   │   ├── c6
│   │   ├── c7
│   │   ├── c8
│   │   └── c9
│   └── valid
│       ├── c0
│       ├── c1
│       ├── c2
│       ├── c3
│       ├── c4
│       ├── c5
│       ├── c6
│       ├── c7
│       ├── c8
│       └── c9
├── test
│   └── unkown
├── train
│   ├── c0
│   ├── c1
│   ├── c2
│   ├── c3
│   ├── c4
│   ├── c5
│   ├── c6
│   ├── c7
│   ├── c8
│   └── c9
└── valid
    ├── c0
    ├── c1
    ├── c2
    ├── c3
    ├── c4
    ├── c5
    ├── c6
    ├── c7
    ├── c8
    └── c9

49 directories

In [143]:
##List files in current directory for reference
%ls


Create_Dataset_Directories.ipynb  kevin_lesson1.ipynb  vgg16bn.pyc
data/                             lesson1.ipynb        vgg16.py*
distracted_driving_2.ipynb        utils.py*            vgg16.pyc
distracted_driving.ipynb          utils.pyc
distracted-driving-results/       vgg16bn.py*

In [144]:
##Set directory to either sample or full

##For sample data
#directory = "data/distracted-driving/sample/"
#results_path = 'distracted-driving-results/sample/'

##For full data
directory = "data/distracted-driving/"
results_path = 'distracted-driving-results/'

In [145]:
##Import and initialize vgg
import utils
import vgg16; reload(vgg16)
from vgg16 import Vgg16
vgg = Vgg16()

In [146]:
##Get batches for training
batches = vgg.get_batches(directory+'train', batch_size=32)


Found 19487 images belonging to 10 classes.

In [147]:
##Finetune the model for distracted driving competition
vgg.finetune(batches)

In [148]:
##Get batches for validation
valid_batches = vgg.get_batches(directory+'valid')


Found 2237 images belonging to 10 classes.

In [ ]:
##Fit model for distracted driving
vgg.fit(batches, valid_batches, nb_epoch=1)


Epoch 1/1
19487/19487 [==============================] - 537s - loss: 1.7996 - acc: 0.4972 - val_loss: 0.4421 - val_acc: 0.8677

In [ ]:
##Save the weights so we don't have to refit if something happens
import datetime
now_string = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

#save as current
vgg.model.save_weights(results_path+'current_ft.h5')
#save as archive
vgg.model.save_weights(results_path+'archive/current_ft_'+now_string+'.h5')

In [ ]:
##Test our newly fitted model
test_batches, results = vgg.test(directory+'test', batch_size=64)


Found 79726 images belonging to 1 classes.

In [ ]:
##Save filenames and results for later so we don't need to rerun if something happens
import numpy as np

#Save as current
np.save(results_path+'predictions.dat',results)
np.save(results_path+'filenames.dat',test_batches.filenames)
#Save as archive
np.save(results_path+'archive/predictions_'+now_string+'.dat',results)
np.save(results_path+'archive/filenames_'+now_string+'.dat',test_batches.filenames)

In [ ]:
##Show the results array
results

In [ ]:
##Create a pd series of the filenames in the test directory
import pandas as pd
import os
filenames = pd.Series(test_batches.filenames, name='img')
filenames = filenames.str.split('/').str.get(-1) ##Remove directory from filename
filenames.head()

In [ ]:
##Create dataframe with results and filenames

#convert results array to df
df = pd.DataFrame(results, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

#add filename column
df['img'] = filenames

#make filename col first
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

df.head()

In [ ]:
##Save submission file to current and archive
df.to_csv(results_path+'submission.csv', index=False)
df.to_csv(results_path+'archive/submission_'+now_string+'.csv', index=False)

In [ ]:
##Print results directory for reference

In [ ]:
%%bash
tree distracted-driving-results/sample/

In [ ]: