Get mean and std values from the dataset


In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.utils import shuffle

In [2]:
import keras
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator


Using TensorFlow backend.

Check data format


In [3]:
%ls ../data/csv/final


v1.csv  v2.csv  v3.csv  v4.csv  v5.csv  v5_train.csv  v5_valid.csv

In [4]:
# define path variables
cur_file = 'v5'
parent_path = os.path.dirname(os.getcwd())

data_path = os.path.join(parent_path, 'data')
img_front_dir_path = os.path.join(data_path, 'img', 'front')
model_path = os.path.join(parent_path, 'model')
log_path = os.path.join(model_path, 'log')


csv_dir_path = os.path.join(data_path, 'csv', 'final')
train_file = os.path.join(csv_dir_path, cur_file + '_train.csv')
valid_file = os.path.join(csv_dir_path, cur_file + '_valid.csv')

# divide by a constant to bound input and output to [0,1]
INPUT_NORMALIZATION = 255
OUTPUT_NORMALIZATION = 65535

In [5]:
df_train = pd.read_csv(os.path.join(data_path, train_file))
print("%d rows" % df_train.shape[0])
df_train.head(3)


208454 rows
Out[5]:
img wheel-axis clutch brake gas paddle-left paddle-right wheel-button-left-1 wheel-button-left-2 wheel-button-left-3 ... gear-1 gear-2 gear-3 gear-4 gear-5 gear-6 gear-R front side_left side_right
0 97802012_2017_08_08_20_47_09_88.jpg 1328 23221 0 34833 0 0 0 0 0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 97802012_2017_08_08_20_47_09_88_front.jpg 97802012_2017_08_08_20_47_09_88_left.jpg 97802012_2017_08_08_20_47_09_88_right.jpg
1 7a13935b_2017_07_30_15_35_17_02.jpg 288 0 0 65535 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 7a13935b_2017_07_30_15_35_17_02_front.jpg 7a13935b_2017_07_30_15_35_17_02_left.jpg 7a13935b_2017_07_30_15_35_17_02_right.jpg
2 3f80cca8_2017_08_08_14_14_12_98.jpg 36 15222 0 38703 0 0 0 0 0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 3f80cca8_2017_08_08_14_14_12_98_front.jpg 3f80cca8_2017_08_08_14_14_12_98_left.jpg 3f80cca8_2017_08_08_14_14_12_98_right.jpg

3 rows × 33 columns


In [6]:
df_val = pd.read_csv(os.path.join(data_path, valid_file))
print("%d rows" % df_val.shape[0])
df_val.head(3)


52114 rows
Out[6]:
img wheel-axis clutch brake gas paddle-left paddle-right wheel-button-left-1 wheel-button-left-2 wheel-button-left-3 ... gear-1 gear-2 gear-3 gear-4 gear-5 gear-6 gear-R front side_left side_right
0 3f80cca8_2017_08_08_14_52_41_43.jpg 140 17287 0 35349 0 0 0 0 0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 3f80cca8_2017_08_08_14_52_41_43_front.jpg 3f80cca8_2017_08_08_14_52_41_43_left.jpg 3f80cca8_2017_08_08_14_52_41_43_right.jpg
1 7d590ce8_2017_08_07_13_54_16_42.jpg -2209 27091 0 53410 0 0 0 0 0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 7d590ce8_2017_08_07_13_54_16_42_front.jpg 7d590ce8_2017_08_07_13_54_16_42_left.jpg 7d590ce8_2017_08_07_13_54_16_42_right.jpg
2 15be80cb_2017_07_28_23_24_53_41.jpg -2625 0 0 0 0 0 0 0 0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 15be80cb_2017_07_28_23_24_53_41_front.jpg 15be80cb_2017_07_28_23_24_53_41_left.jpg 15be80cb_2017_07_28_23_24_53_41_right.jpg

3 rows × 33 columns

Compute mean and std of the dataset


In [7]:
def img_to_arr(p):
    with image.load_img(p) as img:
        img = image.img_to_array(img)
    return img

In [8]:
datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)

In [9]:
df = shuffle(df_train)
X_train_sample = np.array([img_to_arr(os.path.join(img_front_dir_path, p)) for p in df['front'][:2500]])

In [10]:
for i in range(0,3):
    print X_train_sample[:,:,:,i].mean(), X_train_sample[:,:,:,i].std()


89.5761 58.4214
97.5966 61.7917
88.3135 68.2043

In [ ]:

Test what the values will look like after subtracting the mean and dividing by the std.


In [15]:
sample_img = img_to_arr(os.path.join(img_front_dir_path, df['front'][1]))

In [16]:
sample_img.mean(), sample_img.std()


Out[16]:
(118.70493, 63.322655)

In [17]:
sample_img[:,:,0] -= 89.5761
sample_img[:,:,0] /= 58.4214

sample_img[:,:,1] -= 97.5966
sample_img[:,:,1] /= 61.7917

sample_img[:,:,2] -= 88.3135
sample_img[:,:,2] /= 68.2043

sample_img.mean(), sample_img.std()


Out[17]:
(0.4223775, 0.98472536)

In [ ]: