This project aims to develop a machine learning system that can detect and classify different distracted states of car drivers. The main approach is to apply deep convolutional neural networks (CNNs). We will explore and experiment various CNN architectures, leveraged pre-trained networks (learning transfer), psuedo labelling, and potentially an emsenbles of several models to find the best classification. Results of this project may be used to further research and applied to as a part of an on-car online monitoring system where computer will decide to take-over control of the car if the driver is distracted and poses a potential accident.
In [8]:
from tensorflow.python.client import device_lib
[x.physical_device_desc for x in device_lib.list_local_devices() if x.device_type == 'GPU']
print(device_lib.list_local_devices())
In [9]:
import math, os, sys
import numpy as np
from numpy.random import random, permutation, randn, normal
from matplotlib import pyplot as plt
%matplotlib inline
import keras
from keras import backend as k
from keras.utils.data_utils import get_file
from keras.models import Sequential, Model
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers import Input, GlobalAveragePooling2D
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop, Adam
from keras.preprocessing import image
from keras.layers.normalization import BatchNormalization
from keras.utils.np_utils import to_categorical
from keras.metrics import categorical_crossentropy
from keras.regularizers import l2,l1
import PIL
from PIL import Image
import bcolz
import pickle
from shutil import copyfile
from shutil import move
from glob import glob
In [10]:
%pwd
Out[10]:
In [11]:
current_dir = os.getcwd()
PROJECT_DIR = current_dir
path = current_dir+'/imgs/'
test_path = path + 'test/' #We use all the test data
train_path = path + '/train/'
result_path = path + '/results/'
valid_path = path + '/valid/'
In [12]:
'''
%cd $path
%mkdir valid
%mkdir results
%mkdir models'''
Out[12]:
In [13]:
"""# Creating validation set
%cd $valid_path
%mkdir c0
%mkdir c1
%mkdir c2
%mkdir c3
%mkdir c4
%mkdir c5
%mkdir c6
%mkdir c7
%mkdir c8
%mkdir c9
%cd $path"""
Out[13]:
In [14]:
os.listdir(train_path)
Out[14]:
In [15]:
class_modes = ['c0','c1','c2','c3','c4','c5','c6','c7','c8','c9']
for i in class_modes:
print ('label {0} has {1:5d} images'.format(i,len([name for name in os.listdir(train_path+i) if os.path.isfile(os.path.join(train_path+i, name))])))
In [16]:
summ = float(0)
for i in class_modes:
summ=summ+len([name for name in os.listdir(train_path+i) if os.path.isfile(os.path.join(train_path+i, name))])
summ
Out[16]:
There are around 2000 images for each categories. It is probably a good idea to move 20% of images (400 images for each categories) to validation sets.
In [17]:
%cd $train_path
In [18]:
'''for label in class_modes:
g = glob(label+"/*.jpg")
shuffle = np.random.permutation(g)
for i in range(500):
move(shuffle[i], valid_path+shuffle[i])'''
Out[18]:
In [19]:
def get_batches(dirname,
gen=image.ImageDataGenerator(),
shuffle=True,
batch_size=1,
target_size=(224, 224),
class_mode = "categorical"):
return gen.flow_from_directory(path+dirname,
target_size,
class_mode=class_mode,
shuffle=shuffle,
batch_size=batch_size)
def plots(ims, figsize=(12,6), rows=1, titles=True, interp=False):
if type(ims[0]) is np.ndarray:
ims = np.array(ims).astype(np.uint8)
if (ims.shape[-1] != 3):
ims = ims.transpose((0,2,3,1))
f = plt.figure(figsize=figsize)
for i in range(len(ims)):
sp = f.add_subplot(rows, len(ims)//rows, i+1)
sp.axis('Off')
if titles is not None:
sp.set_title(titles[i], fontsize=16)
plt.imshow(ims[i], interpolation=None if interp else 'none')
batches = get_batches("valid", batch_size=6)
imgs , labels = next(batches)
#random images from validation
plots(imgs, titles=labels, figsize=(20,15), rows =2)
In [20]:
batches = get_batches("train", batch_size=6)
imgs , labels = next(batches)
#random images from training
plots(imgs, titles=labels, figsize=(20,15), rows =2)
In [ ]:
In [ ]:
In [21]:
"""def get_data(path, target_size = (224,224)):
batches = get_batches(path, shuffle=False, batch_size=20, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range (len(batches.classes))])"""
def get_data(path, target_size = (224,224)):
batches = get_batches(path, shuffle=False, batch_size=20, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range (len(batches.classes))])
#p1 = Process(target=get_data, args=("train",))
#p2 = Process(target=get_data, args=("valid",))
#p1.start()
#p1.join()
#p2.start()
#p2.join()
train_data = get_data("train")
#del train_data
valid_data = get_date("valid")
#del valid_data
#train_data.flush()
#train_data.close()
#valid_data.flush()
#valid_data.close()
In [ ]:
In [ ]:
def save_array(fname, arr):
c=bcolz.carray(arr, rootdir=fname, mode='w')
c.flush()
save_array('results/train_data.dat', train_data)
save_array('results/train_data.dat', train_data)
In [ ]:
def load_array(fname):
return bcolz.open(fname)[:]
(valid_classes, train_classes, valid_labels, train_labels, valid_filenames, train_filenames) = get_classes(path)
valid_data = load_array(path+'results/valid_data.dat')
train_data = load_array(path+'results/train_data.dat')
In this section I will use a fully connected network with no hidden layer, i.e., linear model. This is to provide a benchmark for other experiments developments.
In [ ]:
Linear_model = Sequential([
BatchNormalization(axis=-1, input_shape=(224,224,3)),
Flatten(),
Dense(10, activation='softmax')
])
Linear_model.compile(Adam(lr=0.000001), loss='categorical_crossentropy', metrics=['accuracy'])
Linear_model.summary()
In [ ]:
In [ ]:
In [ ]: