In [140]:
##Make matplotlib inline
%matplotlib inline
In [141]:
##Print data directory structure for reference
In [142]:
%%bash
tree -d data/distracted-driving/
In [143]:
##List files in current directory for reference
%ls
In [144]:
##Set directory to either sample or full
##For sample data
#directory = "data/distracted-driving/sample/"
#results_path = 'distracted-driving-results/sample/'
##For full data
directory = "data/distracted-driving/"
results_path = 'distracted-driving-results/'
In [145]:
##Import and initialize vgg
import utils
import vgg16; reload(vgg16)
from vgg16 import Vgg16
vgg = Vgg16()
In [146]:
##Get batches for training
batches = vgg.get_batches(directory+'train', batch_size=32)
In [147]:
##Finetune the model for distracted driving competition
vgg.finetune(batches)
In [148]:
##Get batches for validation
valid_batches = vgg.get_batches(directory+'valid')
In [ ]:
##Fit model for distracted driving
vgg.fit(batches, valid_batches, nb_epoch=1)
In [ ]:
##Save the weights so we don't have to refit if something happens
import datetime
now_string = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
#save as current
vgg.model.save_weights(results_path+'current_ft.h5')
#save as archive
vgg.model.save_weights(results_path+'archive/current_ft_'+now_string+'.h5')
In [ ]:
##Test our newly fitted model
test_batches, results = vgg.test(directory+'test', batch_size=64)
In [ ]:
##Save filenames and results for later so we don't need to rerun if something happens
import numpy as np
#Save as current
np.save(results_path+'predictions.dat',results)
np.save(results_path+'filenames.dat',test_batches.filenames)
#Save as archive
np.save(results_path+'archive/predictions_'+now_string+'.dat',results)
np.save(results_path+'archive/filenames_'+now_string+'.dat',test_batches.filenames)
In [ ]:
##Show the results array
results
In [ ]:
##Create a pd series of the filenames in the test directory
import pandas as pd
import os
filenames = pd.Series(test_batches.filenames, name='img')
filenames = filenames.str.split('/').str.get(-1) ##Remove directory from filename
filenames.head()
In [ ]:
##Create dataframe with results and filenames
#convert results array to df
df = pd.DataFrame(results, columns=['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
#add filename column
df['img'] = filenames
#make filename col first
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]
df.head()
In [ ]:
##Save submission file to current and archive
df.to_csv(results_path+'submission.csv', index=False)
df.to_csv(results_path+'archive/submission_'+now_string+'.csv', index=False)
In [ ]:
##Print results directory for reference
In [ ]:
%%bash
tree distracted-driving-results/sample/
In [ ]: