notebook.community

Edit and run



In [140]:

    
##Make matplotlib inline
%matplotlib inline



In [141]:

    
##Print data directory structure for reference



In [142]:

    
%%bash
tree -d data/distracted-driving/









    



data/distracted-driving/
├── sample
│   ├── test
│   │   └── unkown
│   ├── train
│   │   ├── c0
│   │   ├── c1
│   │   ├── c2
│   │   ├── c3
│   │   ├── c4
│   │   ├── c5
│   │   ├── c6
│   │   ├── c7
│   │   ├── c8
│   │   └── c9
│   └── valid
│       ├── c0
│       ├── c1
│       ├── c2
│       ├── c3
│       ├── c4
│       ├── c5
│       ├── c6
│       ├── c7
│       ├── c8
│       └── c9
├── test
│   └── unkown
├── train
│   ├── c0
│   ├── c1
│   ├── c2
│   ├── c3
│   ├── c4
│   ├── c5
│   ├── c6
│   ├── c7
│   ├── c8
│   └── c9
└── valid
    ├── c0
    ├── c1
    ├── c2
    ├── c3
    ├── c4
    ├── c5
    ├── c6
    ├── c7
    ├── c8
    └── c9

49 directories



In [143]:

    
##List files in current directory for reference
%ls









    



Create_Dataset_Directories.ipynb  kevin_lesson1.ipynb  vgg16bn.pyc
data/                             lesson1.ipynb        vgg16.py*
distracted_driving_2.ipynb        utils.py*            vgg16.pyc
distracted_driving.ipynb          utils.pyc
distracted-driving-results/       vgg16bn.py*



In [144]:

    
##Set directory to either sample or full

##For sample data
#directory = "data/distracted-driving/sample/"
#results_path = 'distracted-driving-results/sample/'

##For full data
directory = "data/distracted-driving/"
results_path = 'distracted-driving-results/'



In [145]:

    
##Import and initialize vgg
import utils
import vgg16; reload(vgg16)
from vgg16 import Vgg16
vgg = Vgg16()



In [146]:

    
##Get batches for training
batches = vgg.get_batches(directory+'train', batch_size=32)









    



Found 19487 images belonging to 10 classes.



In [147]:

    
##Finetune the model for distracted driving competition
vgg.finetune(batches)



In [148]:

    
##Get batches for validation
valid_batches = vgg.get_batches(directory+'valid')









    



Found 2237 images belonging to 10 classes.



In [ ]:

    
##Fit model for distracted driving
vgg.fit(batches, valid_batches, nb_epoch=1)









    



Epoch 1/1
19487/19487 [==============================] - 537s - loss: 1.7996 - acc: 0.4972 - val_loss: 0.4421 - val_acc: 0.8677



In [ ]:

    
##Save the weights so we don't have to refit if something happens
import datetime
now_string = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

#save as current
vgg.model.save_weights(results_path+'current_ft.h5')
#save as archive
vgg.model.save_weights(results_path+'archive/current_ft_'+now_string+'.h5')



In [ ]:

    
##Test our newly fitted model
test_batches, results = vgg.test(directory+'test', batch_size=64)









    



Found 79726 images belonging to 1 classes.



In [ ]:

    
##Save filenames and results for later so we don't need to rerun if something happens
import numpy as np

#Save as current
np.save(results_path+'predictions.dat',results)
np.save(results_path+'filenames.dat',test_batches.filenames)
#Save as archive
np.save(results_path+'archive/predictions_'+now_string+'.dat',results)
np.save(results_path+'archive/filenames_'+now_string+'.dat',test_batches.filenames)



In [ ]:

    
##Show the results array
results



In [ ]:

    
##Create a pd series of the filenames in the test directory
import pandas as pd
import os
filenames = pd.Series(test_batches.filenames, name='img')
filenames = filenames.str.split('/').str.get(-1) ##Remove directory from filename
filenames.head()



In [ ]:

    
##Create dataframe with results and filenames

#convert results array to df
df = pd.DataFrame(results, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])

#add filename column
df['img'] = filenames

#make filename col first
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

df.head()



In [ ]:

    
##Save submission file to current and archive
df.to_csv(results_path+'submission.csv', index=False)
df.to_csv(results_path+'archive/submission_'+now_string+'.csv', index=False)



In [ ]:

    
##Print results directory for reference



In [ ]:

    
%%bash
tree distracted-driving-results/sample/



In [ ]: