Convert Pictures to Numpy


In [1]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
SCRIPTS_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'

In [2]:
from glob import glob
import numpy as np
import _pickle as pickle
import PIL
from PIL import Image

from tqdm import tqdm
from PIL import ImageOps
from PIL import Image

from tqdm import tqdm
import bcolz

In [10]:
import seaborn as sns
import matplotlib as plt
%matplotlib inline

Iterate through pictures in directory

Assuming X_train exists as ordered images
Y_train is csv file


In [3]:
def folder_to_numpy(image_directory_full):
    """
    Read sorted pictures (by filename) in a folder to a numpy array
    
    USAGE:
        data_folder = '/train/test1'
        X_train = folder_to_numpy(data_folder)
    
    Args:
        data_folder (str): The relative folder from DATA_HOME_DIR
        
    Returns:
        picture_array (np array): The numpy array in tensorflow format
    """
    # change directory
    print ("Moving to directory: " + image_directory_full)
    os.chdir(image_directory_full)
    
    # read in filenames from directory
    g = glob('*.png')
    if len(g) == 0:
        g = glob('*.jpg')
    print ("Found {} pictures".format(len(g)))
    
    # sort filenames
    g.sort()
    
    # open and convert images to numpy array
    print("Starting pictures to numpy conversion")
    picture_arrays = np.array([np.array(Image.open(image_path)) for image_path in g])
    
#     reshape to tensorflow format
#     picture_arrays = picture_arrays.reshape(*picture_arrays.shape, 1)
    print ("Shape of output: {}".format(picture_arrays.shape))
    
    # return array
    return picture_arrays

In [24]:
data_folder = '/train/binary/forward'
X_train = folder_to_numpy(data_folder)


Moving to directory: /home/nathan/olin/spring2017/line-follower/line-follower/src/old_lane_follower_past_project/data/train/binary/forward
Found 754 pictures
Starting pictures to numpy conversion
Shape of output: (754, 16, 32, 1)

In [5]:
Y_train = np.arange(0,754).reshape(754,1)
# Y_train = np.random.rand(X_train.shape[0], 1)
# Y_train = genfromtxt('my_file.csv', delimiter=',')

In [6]:
Y_train.shape


Out[6]:
(754, 1)

In [7]:
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()

def load_array(fname):
    return bcolz.open(fname)[:]

In [8]:
# save_array('test.bc', X_train)
# X_train = load_array('test.bc')

In [9]:
# from keras.preprocessing import image

In [25]:
def flip4DArray(array):
    return array[..., ::-1,:] #[:,:,::-1] also works but is 50% slower

In [26]:
X_train_flip = flip3DArray(X_train)

In [27]:
X_train_flip.shape


Out[27]:
(754, 16, 32, 1)

In [28]:
X_train = X_train.reshape(X_train.shape[:-1])

In [29]:
X_train_flip = X_train_flip.reshape(X_train_flip.shape[:-1])

In [30]:
sns.heatmap(X_train[10], cmap='gray')


Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe7bdc12cc0>

In [31]:
sns.heatmap(X_train_flip[10], cmap='gray')


Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe7bdaf6390>

In [18]:
gen = image.ImageDataGenerator()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-c9f2072a4ad4> in <module>()
----> 1 gen = image.ImageDataGenerator()

NameError: name 'image' is not defined

In [ ]:
train = gen.flow(X_train.reshape(*X_train.shape, 1), Y_train, shuffle=False, batch_size=64)

In [ ]:
x, y = train.next()
print(x.shape, y.shape)
print(y)

In [ ]: