In [1]:
    
%matplotlib inline
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
    
In [2]:
    
df1 = pd.read_csv('../data/raw/sample/driving_log.csv')
    
In [3]:
    
columns = df1.columns
    
In [4]:
    
df1_count = len(df1)
df1_count
    
    Out[4]:
In [5]:
    
df1.info()
    
    
In [6]:
    
df1.head()
    
    Out[6]:
In [7]:
    
df1_summary = df1.describe().transpose()
df1_summary_cols = df1_summary.index
print(df1_summary_cols)
df1_summary
    
    
    Out[7]:
In [8]:
    
df1.plot(kind='box')
    
    Out[8]:
    
In [10]:
    
df1['steering'].plot(kind='kde')
    
    Out[10]:
    
In [9]:
    
# normalise data
df1_norm = normalize(df1[df1_summary_cols][:])
# put into dataframe for easy charting
df1_norm = pd.DataFrame(df1_norm, columns=df1_summary_cols)
# drop columns with zero mean and zero std.dev
df1_norm = df1_norm.loc[:, (df1_norm.mean() != 0.) & (df1_norm.std() != 0.)]
# plot kde
df1_norm.plot(kind='kde', 
              subplots=True, 
              figsize=(10, 5))
plt.tight_layout()
    
    
In [10]:
    
df2 = pd.read_csv('../data/raw/sim_track_1_direction_default/driving_log.csv', 
                  header=None, 
                  names=columns)
    
In [11]:
    
df2_count = len(df2)
df2_count
    
    Out[11]:
In [12]:
    
df2.info()
    
    
In [13]:
    
df2.head()
    
    Out[13]:
In [14]:
    
print(df2['center'][0])
    
    
In [15]:
    
!pwd
    
    
In [16]:
    
df2_summary = df2.describe().transpose()
df2_summary_cols = df2_summary.index
print(df2_summary_cols)
df2_summary
    
    
    Out[16]:
In [17]:
    
df2.plot(kind='box')
    
    Out[17]:
    
In [18]:
    
# normalise data
df2_norm = normalize(df2[df2_summary_cols][:])
# put into dataframe for easy charting
df2_norm = pd.DataFrame(df2_norm, columns=df2_summary_cols)
# drop columns with zero mean and zero std.dev
df2_norm = df2_norm.loc[:, (df2_norm.mean() != 0.) & (df2_norm.std() != 0.)]
# plot kde
df2_norm.plot(kind='kde', 
              subplots=True, 
              figsize=(10, 5))
plt.tight_layout()
    
    
In [19]:
    
df3 = pd.read_csv('../data/raw/sim_track_1_direction_reverse/driving_log.csv', 
                  header=None, 
                  names=columns)
    
In [20]:
    
df3_count = len(df3)
df3_count
    
    Out[20]:
In [21]:
    
df3.info()
    
    
In [22]:
    
df3.head()
    
    Out[22]:
In [23]:
    
df3_summary = df3.describe().transpose()
df3_summary_cols = df3_summary.index
print(df3_summary_cols)
df3_summary
    
    
    Out[23]:
In [24]:
    
df3.plot(kind='box')
    
    Out[24]:
    
In [25]:
    
# normalise data
df3_norm = normalize(df3[df3_summary_cols][:])
# put into dataframe for easy charting
df3_norm = pd.DataFrame(df3_norm, columns=df3_summary_cols)
# drop columns with zero mean and zero std.dev
df3_norm = df3_norm.loc[:, (df3_norm.mean() != 0.) & (df3_norm.std() != 0.)]
# plot kde
df3_norm.plot(kind='kde', 
              subplots=True, 
              figsize=(10, 5))
plt.tight_layout()
    
    
In [26]:
    
df4 = pd.read_csv('../data/raw/sim_track_2_direction_default/driving_log.csv', 
                  header=None, 
                  names=columns)
    
In [27]:
    
df4_count = len(df3)
df4_count
    
    Out[27]:
In [28]:
    
df4.info()
    
    
In [29]:
    
df4.head()
    
    Out[29]:
In [30]:
    
df4_summary = df4.describe().transpose()
df4_summary_cols = df4_summary.index
print(df4_summary_cols)
df4_summary
    
    
    Out[30]:
In [31]:
    
df4.plot(kind='box')
    
    Out[31]:
    
In [32]:
    
# normalise data
df4_norm = normalize(df4[df4_summary_cols][:])
# put into dataframe for easy charting
df4_norm = pd.DataFrame(df4_norm, columns=df4_summary_cols)
# drop columns with zero mean and zero std.dev
df4_norm = df3_norm.loc[:, (df4_norm.mean() != 0.) & (df4_norm.std() != 0.)]
# plot kde
df4_norm.plot(kind='kde', 
              subplots=True, 
              figsize=(10, 5))
plt.tight_layout()
    
    
In [33]:
    
df5 = pd.read_csv('../data/raw/sim_track_2_direction_reverse/driving_log.csv', 
                  header=None, 
                  names=columns)
    
In [34]:
    
df5_count = len(df5)
df5_count
    
    Out[34]:
In [35]:
    
df5.info()
    
    
In [36]:
    
df5.head()
    
    Out[36]:
In [37]:
    
df5_summary = df5.describe().transpose()
df5_summary_cols = df5_summary.index
print(df5_summary_cols)
df5_summary
    
    
    Out[37]:
In [38]:
    
df5.plot(kind='box')
    
    Out[38]:
    
In [39]:
    
# normalise data
df5_norm = normalize(df5[df5_summary_cols][:])
# put into dataframe for easy charting
df5_norm = pd.DataFrame(df5_norm, columns=df5_summary_cols)
# drop columns with zero mean and zero std.dev
df5_norm = df5_norm.loc[:, (df5_norm.mean() != 0.) & (df5_norm.std() != 0.)]
# plot kde
df5_norm.plot(kind='kde', 
              subplots=True, 
              figsize=(10, 5))
plt.tight_layout()
    
    
In [40]:
    
track_1_count = df2_count + df3_count
track_2_count = df4_count + df5_count
total_count = df1_count + df2_count + df3_count + df4_count + df5_count
print('source\t\t\t\t\t     n\t  total\t       %')
print('====================================     =====   ======\t  ======\n')
print('sample: total \t\t\t\t{:>6,}\t\t  {:>5.3}%'.format(df1_count, 
                                                           (df1_count / total_count) * 100))
print('----------------------------------------------------------------\n')
print('sim track 1: direction=default \t\t{:>6,}'.format(df2_count))
print('sim track 1: direction=reverse \t\t{:>6,}'.format(df3_count))
print('sim track 1: total \t\t\t\t {:>6,}\t  {:>5.3}%'.format(track_1_count, 
                                                             (track_1_count / total_count) * 100))
print('----------------------------------------------------------------\n')
print('sim track 2: direction=default \t\t{:>6,}'.format(df4_count))
print('sim track 2: direction=reverse \t\t{:>6,}'.format(df5_count))
print('sim track 2: total \t\t\t\t {:>6,}\t  {:>5.3}%'.format(track_2_count, 
                                                    (track_2_count / total_count) * 100))
print('----------------------------------------------------------------\n')
print('totals (x3 for centre + left + right)\t\t {:>6,}\t  100.0%'.format(total_count))
print('================================================================')
    
    
In [51]:
    
img_path = '/src/repos/udacity_carnd/project_3/data/raw/sample/IMG/center_2016_12_01_13_30_48_287.jpg'
img = cv2.imread(img_path, cv2.COLOR_BGR2RGB)
plt.imshow(img)
    
    Out[51]:
    
In [52]:
    
type(img)
    
    Out[52]:
In [53]:
    
img = img[60:140, :, :]
img.shape
    
    Out[53]:
In [54]:
    
plt.imshow(img)
    
    Out[54]: