In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
In [2]:
import keras
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
In [3]:
%ls ../data/csv/final
In [4]:
# define path variables
cur_file = 'v5'
parent_path = os.path.dirname(os.getcwd())
data_path = os.path.join(parent_path, 'data')
img_front_dir_path = os.path.join(data_path, 'img', 'front')
model_path = os.path.join(parent_path, 'model')
log_path = os.path.join(model_path, 'log')
csv_dir_path = os.path.join(data_path, 'csv', 'final')
train_file = os.path.join(csv_dir_path, cur_file + '_train.csv')
valid_file = os.path.join(csv_dir_path, cur_file + '_valid.csv')
# divide by a constant to bound input and output to [0,1]
INPUT_NORMALIZATION = 255
OUTPUT_NORMALIZATION = 65535
In [5]:
df_train = pd.read_csv(os.path.join(data_path, train_file))
print("%d rows" % df_train.shape[0])
df_train.head(3)
Out[5]:
In [6]:
df_val = pd.read_csv(os.path.join(data_path, valid_file))
print("%d rows" % df_val.shape[0])
df_val.head(3)
Out[6]:
In [7]:
def img_to_arr(p):
with image.load_img(p) as img:
img = image.img_to_array(img)
return img
In [8]:
datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
In [9]:
df = shuffle(df_train)
X_train_sample = np.array([img_to_arr(os.path.join(img_front_dir_path, p)) for p in df['front'][:2500]])
In [10]:
for i in range(0,3):
print X_train_sample[:,:,:,i].mean(), X_train_sample[:,:,:,i].std()
In [ ]:
Test what the values will look like after subtracting the mean and dividing by the std.
In [15]:
sample_img = img_to_arr(os.path.join(img_front_dir_path, df['front'][1]))
In [16]:
sample_img.mean(), sample_img.std()
Out[16]:
In [17]:
sample_img[:,:,0] -= 89.5761
sample_img[:,:,0] /= 58.4214
sample_img[:,:,1] -= 97.5966
sample_img[:,:,1] /= 61.7917
sample_img[:,:,2] -= 88.3135
sample_img[:,:,2] /= 68.2043
sample_img.mean(), sample_img.std()
Out[17]:
In [ ]: