Data preprocessing


In [1]:
#Create references to important directories we will use over and over
import os, sys
current_dir = os.getcwd()
SCRIPTS_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data'

In [2]:
DATA_HOME_DIR


Out[2]:
'/home/nathan/olin/spring2017/line-follower/line-follower/src/old_lane_follower_past_project/data'

In [3]:
from glob import glob
import numpy as np
import _pickle as pickle
import PIL
from PIL import Image
from PIL import ImageOps
from tqdm import tqdm
import bcolz

In [3]:
def resize_32(image_path):
    """ chops off the top half of a 32 by 32 image so it is 16 by 16 """
    # open image and get array
    image_array = np.asarray(Image.open(image_path))
    if image_array.shape == (32, 32):
        image_array = image_array[16:,:]
        img = Image.fromarray(image_array, 'L')
        img.save(image_path)

In [4]:
%cd $DATA_HOME_DIR/train/binary
folders = ([name for name in os.listdir(".") if os.path.isdir(name)])


/home/nathan/olin/spring2017/lane_follower/src/scripts/data/train/binary

In [7]:
for folder in tqdm(folders):
    os.chdir(DATA_HOME_DIR + '/train/binary/' + folder)
    g = glob('*.png')
    for image_path in g:
        resize_32(image_path)


0it [00:00, ?it/s]

In [8]:
%cd $DATA_HOME_DIR/valid/binary
folders = ([name for name in os.listdir(".") if os.path.isdir(name)])


/home/nathan/olin/spring2017/lane_follower/src/scripts/data/valid/binary

In [9]:
for folder in tqdm(folders):
    os.chdir(DATA_HOME_DIR + '/valid/binary/' + folder)
    g = glob('*.png')
    for image_path in g:
        resize_32(image_path)


100%|██████████| 3/3 [00:00<00:00, 33.82it/s]

Average Image


In [10]:
%cd $DATA_HOME_DIR/train/binary
folders = ([name for name in os.listdir(".") if os.path.isdir(name)])


/home/nathan/olin/spring2017/lane_follower/src/scripts/data/train/binary

In [15]:
imgs_all = []
for folder in tqdm(folders):
    os.chdir(DATA_HOME_DIR + '/train/binary/' + folder)
    g = glob('*.png')
    imgs = np.array([np.asarray(Image.open(image_path)) for image_path in g])
    img_avg = imgs.sum(axis=0) / imgs.shape[0]
    imgs_all.append(img_avg)


100%|██████████| 3/3 [00:00<00:00, 33.42it/s]

In [23]:
from matplotlib.pyplot import imshow
%matplotlib inline

In [27]:
imshow(imgs_all[0])


Out[27]:
<matplotlib.image.AxesImage at 0x7f2702719080>

In [28]:
imshow(imgs_all[1])


Out[28]:
<matplotlib.image.AxesImage at 0x7f2706ad1ac8>

In [29]:
imshow(imgs_all[2])


Out[29]:
<matplotlib.image.AxesImage at 0x7f27029f5748>

Color


In [4]:
%cd $DATA_HOME_DIR/train/color
folders = ([name for name in os.listdir(".") if os.path.isdir(name)])


/home/nathan/olin/spring2017/line-follower/line-follower/src/old_lane_follower_past_project/data/train/color

In [7]:
for folder in tqdm(folders):
    os.chdir(DATA_HOME_DIR + '/train/color/' + folder)
    g = glob('*.png')
    for image_path in g:
        image_array = np.asarray(Image.open(image_path))
        if image_array.shape == (32, 32, 3):
            image_array = image_array[16:,:,:]
            img = Image.fromarray(image_array, 'RGB')
            img.save(image_path)


100%|██████████| 3/3 [00:00<00:00, 13.96it/s]

In [ ]:
def resize_32(image_path):
    """ chops off the top half of a 32 by 32 image so it is 16 by 16 """
    # open image and get array
    image_array = np.asarray(Image.open(image_path))
    if image_array.shape == (32, 32):
        image_array = image_array[16:,:]
        img = Image.fromarray(image_array, 'L')
        img.save(image_path)