In [6]:
import glob
import sys
import os
from datetime import datetime
from PIL import Image
from PIL.ImageOps import invert
import numpy as np
from tools.le import le
import time

In [7]:
angles = range(0, 360, 45)    # Rotation angles in degrees

def make_rotations(image):
    imgs_rot = [(invert(invert(image).rotate(ang)), ang) for ang in angles]
    return imgs_rot

In [11]:
mode = 'test'

if mode == 'train':
    path_data = '/data/ndsb/train' # location of source train files
    labels = os.listdir(path_data)
    files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*', '*.jpg'))])
    y_str = [os.path.basename(os.path.dirname(f)) for f in files_all]
    y = le.transform(y_str)
elif mode == 'test':
    path_data = '/data/ndsb/test'  # location of source train files
    files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*.jpg'))])

In [12]:
if mode == 'train':
    path_augment = '/data/ndsb/augment/train'
    # Make directory for each class label
    for label in set(y_str):
        aug_dir = os.path.join(path_augment, label)
        if not os.path.exists(aug_dir):
            os.mkdir(aug_dir)
elif mode == 'test':
    path_augment = '/data/ndsb/augment/test'
    # Make directory for each image (each image might be its own label)
    for f in files_all:
        f_name = os.path.splitext(os.path.basename(f))[0]
        aug_dir = os.path.join(path_augment, f_name)
        if not os.path.exists(aug_dir):
            os.mkdir(aug_dir)
print 'Done prepping directories'

In [14]:
tic = time.time()
fails = []
if mode == 'train':
    for ii, (f, l_str) in enumerate(zip(files_all, y_str)):
        f_name = os.path.splitext(os.path.basename(f))[0]
        img = Image.open(f)
        dir_save = os.path.join(path_augment, l_str)
        img_rots = make_rotations(img)
        for rimg, ang in img_rots:
            path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
            rimg.save(path_save)

        if ((ii+1) % 5000) == 0:
            print 'Processed %i files in %is' % (ii+1, time.time() - tic)
            sys.stdout.flush()
elif mode == 'test':
    for ii, f in enumerate(files_all):
        f_name = os.path.splitext(os.path.basename(f))[0]
        img = Image.open(f)
        dir_save = os.path.join(path_augment, f_name)
        img_rots = make_rotations(img)
        for rimg, ang in img_rots:
            path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
            rimg.save(path_save)
#             try:
#                 rimg.save(path_save)
#             except IOError:
#                 fails.append((rimg, path_save))

        if ((ii+1) % 5000) == 0:
            print 'Processed %i files in %is' % (ii+1, time.time() - tic)
            sys.stdout.flush()
print 'Done saving augmented files'


Processed 5000 files in 23s
Processed 10000 files in 95s
Processed 15000 files in 226s
Processed 20000 files in 326s
Processed 25000 files in 447s
Processed 30000 files in 578s
Processed 35000 files in 702s
Processed 40000 files in 828s
Processed 45000 files in 966s
Processed 50000 files in 1106s
Processed 55000 files in 1248s
Processed 60000 files in 1380s
Processed 65000 files in 1522s
Processed 70000 files in 1656s
Processed 75000 files in 1796s
Processed 80000 files in 1924s
Processed 85000 files in 2061s
Processed 90000 files in 2193s
Processed 95000 files in 2322s
Processed 100000 files in 2469s
Processed 105000 files in 2603s
Processed 110000 files in 2736s
Processed 115000 files in 2864s
Processed 120000 files in 3009s
Processed 125000 files in 3153s
Processed 130000 files in 3274s
Done saving augmented files

In [66]:
# # TRY AGAIN
# if mode in ['train', 'test']:
#     for ii, (im, path_save) in enumerate(fails):
#         path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
#         rimg.save(path_save)

#         if ((ii+1) % 1000) == 0:
#             print 'Processed %i files in %is' % (ii+1, time.time() - tic)


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-66-f854fe75c473> in <module>()
      2 for ii, (im, path_save) in enumerate(fails):
      3     path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
----> 4     rimg.save(path_save)
      5 
      6     if ((ii+1) % 1000) == 0:

/usr/lib/python2.7/site-packages/PIL/Image.pyc in save(self, fp, format, **params)
   1677 
   1678         if isPath(fp):
-> 1679             fp = builtins.open(fp, "wb")
   1680             close = 1
   1681         else:

IOError: [Errno 27] File too large: '../../data/ndsb/augment/test/26855_rot315.jpg'

In [24]:
if mode == 'single':
    # Testing with a single file
    import matplotlib.pyplot as plt
    %matplotlib inline
    IMAGE_FILE = '/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/acantharia_protist/100224.jpg'
    im = Image.open(IMAGE_FILE)
    rots = make_rotations(im)
    fig, axes = plt.subplots(1,2)
    axes[0].imshow(im, interpolation='none', cmap='gray')
    axes[1].imshow(rots[0][0], interpolation='none', cmap='gray')
    # plt.axis('off')


Out[24]:
<matplotlib.image.AxesImage at 0x7f4ded866e90>