In [47]:
import pandas as pd

from os import listdir

In [2]:
!ls ../raw_data/


driver_imgs_list.csv  imgs                  sample_submission.csv

Data Summary

1) Sample Submission

  • Sum(c0:c9) == 1
  • Represent the predicted likelihood of each class
  • Expected Samples: 79,726

In [40]:
# Sample Data Raw
sample_df = pd.read_csv('../raw_data/sample_submission.csv')
print len(sample_df)
sample_df.head(1)


79726
Out[40]:
img c0 c1 c2 c3 c4 c5 c6 c7 c8 c9
0 img_1.jpg 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1

In [38]:
col_map = {
    'c0' : 'safe driving',
    'c1' : 'texting - right',
    'c2' : 'talking on the phone - right',
    'c3': 'texting - left',
    'c4': 'talking on the phone - left',
    'c5': 'operating the radio',
    'c6': 'drinking',
    'c7': 'reaching behind',
    'c8': 'hair and makeup',
    'c9': 'talking to passenger'
    }
sample_df.rename(columns=col_map).head(1)


Out[38]:
img safe driving texting - right talking on the phone - right texting - left talking on the phone - left operating the radio drinking reaching behind hair and makeup talking to passenger
0 img_1.jpg 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1

2) Driver Images List

  • Training Dataset with proper classifications of some images
  • Train Size: 22,424

In [45]:
img_list_df = pd.read_csv('../raw_data/driver_imgs_list.csv')
print len(img_list_df)
img_list_df.head(1)


22424
Out[45]:
subject classname img
0 p002 c0 img_44733.jpg

In [ ]:

3) Image Directory

  • Contains a train/ and test/ directory

In [56]:
!ls ../raw_data/imgs/


test  train

Train Directory:

  • Contains a Direcotry for c0 through c9 classes (10 directories)

Class Directory Summary

  • c0 :: safe driving :: Length: 2490
  • c1 :: texting - right :: Length: 2267
  • c2 :: talking on the phone - right :: Length: 2317
  • c3 :: texting - left :: Length: 2346
  • c4 :: talking on the phone - left :: Length: 2326
  • c5 :: operating the radio :: Length: 2312
  • c6 :: drinking :: Length: 2325
  • c7 :: reaching behind :: Length: 2002
  • c8 :: hair and makeup :: Length: 1911
  • c9 :: talking to passenger :: Length: 2129

In [62]:
train_path = '../raw_data/imgs/train/'
train_files = listdir(train_path)
print len(train_files)
train_files


10
Out[62]:
['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']

In [105]:
train_file_df = pd.DataFrame(columns=['file_path', 'class'])

for clas in train_files:
    class_files = listdir(train_path+clas+'/')
    if '.DS_Store' in class_files:
        class_files.remove('.DS_Store')
    # Create Dataframe with all files needed
    train_file_df = train_file_df.append(
        pd.DataFrame(zip([train_path+clas+'/'+f for f in class_files], [clas for _ in xrange(len(class_files))])
                    , columns=['file_path', 'class'])
                         )

In [108]:
train_file_df.head(2)


Out[108]:
file_path class
0 ../raw_data/imgs/train/c0/img_100026.jpg c0
1 ../raw_data/imgs/train/c0/img_10003.jpg c0

In [ ]:


In [ ]:

Test Directory

  • Contains a large number of images

In [ ]: