In [1]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
SCRIPTS_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'
In [2]:
from glob import glob
import numpy as np
import _pickle as pickle
import PIL
from PIL import Image
from tqdm import tqdm
from PIL import ImageOps
from PIL import Image
from tqdm import tqdm
import bcolz
In [10]:
import seaborn as sns
import matplotlib as plt
%matplotlib inline
In [3]:
def folder_to_numpy(image_directory_full):
"""
Read sorted pictures (by filename) in a folder to a numpy array
USAGE:
data_folder = '/train/test1'
X_train = folder_to_numpy(data_folder)
Args:
data_folder (str): The relative folder from DATA_HOME_DIR
Returns:
picture_array (np array): The numpy array in tensorflow format
"""
# change directory
print ("Moving to directory: " + image_directory_full)
os.chdir(image_directory_full)
# read in filenames from directory
g = glob('*.png')
if len(g) == 0:
g = glob('*.jpg')
print ("Found {} pictures".format(len(g)))
# sort filenames
g.sort()
# open and convert images to numpy array
print("Starting pictures to numpy conversion")
picture_arrays = np.array([np.array(Image.open(image_path)) for image_path in g])
# reshape to tensorflow format
# picture_arrays = picture_arrays.reshape(*picture_arrays.shape, 1)
print ("Shape of output: {}".format(picture_arrays.shape))
# return array
return picture_arrays
In [24]:
data_folder = '/train/binary/forward'
X_train = folder_to_numpy(data_folder)
In [5]:
Y_train = np.arange(0,754).reshape(754,1)
# Y_train = np.random.rand(X_train.shape[0], 1)
# Y_train = genfromtxt('my_file.csv', delimiter=',')
In [6]:
Y_train.shape
Out[6]:
In [7]:
def save_array(fname, arr):
c=bcolz.carray(arr, rootdir=fname, mode='w')
c.flush()
def load_array(fname):
return bcolz.open(fname)[:]
In [8]:
# save_array('test.bc', X_train)
# X_train = load_array('test.bc')
In [9]:
# from keras.preprocessing import image
In [25]:
def flip4DArray(array):
return array[..., ::-1,:] #[:,:,::-1] also works but is 50% slower
In [26]:
X_train_flip = flip3DArray(X_train)
In [27]:
X_train_flip.shape
Out[27]:
In [28]:
X_train = X_train.reshape(X_train.shape[:-1])
In [29]:
X_train_flip = X_train_flip.reshape(X_train_flip.shape[:-1])
In [30]:
sns.heatmap(X_train[10], cmap='gray')
Out[30]:
In [31]:
sns.heatmap(X_train_flip[10], cmap='gray')
Out[31]:
In [18]:
gen = image.ImageDataGenerator()
In [ ]:
train = gen.flow(X_train.reshape(*X_train.shape, 1), Y_train, shuffle=False, batch_size=64)
In [ ]:
x, y = train.next()
print(x.shape, y.shape)
print(y)
In [ ]: