In [6]:
import glob
import sys
import os
from datetime import datetime
from PIL import Image
from PIL.ImageOps import invert
import numpy as np
from tools.le import le
import time

In [7]:
angles = range(0, 360, 45)    # Rotation angles in degrees

def make_rotations(image):
    imgs_rot = [(invert(invert(image).rotate(ang)), ang) for ang in angles]
    return imgs_rot

In [11]:
mode = 'test'

if mode == 'train':
    path_data = '/data/ndsb/train' # location of source train files
    labels = os.listdir(path_data)
    files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*', '*.jpg'))])
    y_str = [os.path.basename(os.path.dirname(f)) for f in files_all]
    y = le.transform(y_str)
elif mode == 'test':
    path_data = '/data/ndsb/test'  # location of source train files
    files_all = np.array([os.path.abspath(f) for f in glob.glob(os.path.join(path_data, '*.jpg'))])

In [12]:
if mode == 'train':
    path_augment = '/data/ndsb/augment/train'
    # Make directory for each class label
    for label in set(y_str):
        aug_dir = os.path.join(path_augment, label)
        if not os.path.exists(aug_dir):
            os.mkdir(aug_dir)
elif mode == 'test':
    path_augment = '/data/ndsb/augment/test'
    # Make directory for each image (each image might be its own label)
    for f in files_all:
        f_name = os.path.splitext(os.path.basename(f))[0]
        aug_dir = os.path.join(path_augment, f_name)
        if not os.path.exists(aug_dir):
            os.mkdir(aug_dir)
print 'Done prepping directories'

In [13]:
tic = time.time()
fails = []
if mode == 'train':
    for ii, (f, l_str) in enumerate(zip(files_all, y_str)):
        f_name = os.path.splitext(os.path.basename(f))[0]
        img = Image.open(f)
        dir_save = os.path.join(path_augment, l_str)
        img_rots = make_rotations(img)
        for rimg, ang in img_rots:
            path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
            rimg.save(path_save)

        if ((ii+1) % 5000) == 0:
            print 'Processed %i files in %is' % (ii+1, time.time() - tic)
            sys.stdout.flush()
elif mode == 'test':
    for ii, f in enumerate(files_all):
        f_name = os.path.splitext(os.path.basename(f))[0]
        img = Image.open(f)
        dir_save = os.path.join(path_augment, f_name)
        img_rots = make_rotations(img)
        for rimg, ang in img_rots:
            path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
            rimg.save(path_save)
#             try:
#                 rimg.save(path_save)
#             except IOError:
#                 fails.append((rimg, path_save))

        if ((ii+1) % 5000) == 0:
            print 'Processed %i files in %is' % (ii+1, time.time() - tic)
            sys.stdout.flush()
print 'Done saving augmented files'


Processed 1000 files in 24s
Processed 2000 files in 41s
Processed 3000 files in 55s
Processed 4000 files in 81s
Processed 5000 files in 104s
Processed 6000 files in 123s
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-13-2c89f668ab91> in <module>()
     23             path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
     24             try:
---> 25                 rimg.save(path_save)
     26             except IOError:
     27                 fails.append((rimg, path_save))

/usr/lib/python2.7/site-packages/PIL/Image.pyc in save(self, fp, format, **params)
   1658         preinit()
   1659 
-> 1660         ext = os.path.splitext(filename)[1].lower()
   1661 
   1662         if not format:

/usr/lib/python2.7/posixpath.pyc in splitext(p)
    103 
    104 def splitext(p):
--> 105     return genericpath._splitext(p, sep, altsep, extsep)
    106 splitext.__doc__ = genericpath._splitext.__doc__
    107 

/usr/lib/python2.7/genericpath.pyc in _splitext(p, sep, altsep, extsep)
    100         while filenameIndex < dotIndex:
    101             if p[filenameIndex] != extsep:
--> 102                 return p[:dotIndex], p[dotIndex:]
    103             filenameIndex += 1
    104 

KeyboardInterrupt: 

In [66]:
# # TRY AGAIN
# if mode in ['train', 'test']:
#     for ii, (im, path_save) in enumerate(fails):
#         path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
#         rimg.save(path_save)

#         if ((ii+1) % 1000) == 0:
#             print 'Processed %i files in %is' % (ii+1, time.time() - tic)


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-66-f854fe75c473> in <module>()
      2 for ii, (im, path_save) in enumerate(fails):
      3     path_save = os.path.join(dir_save, f_name + '_rot' + str(ang) + '.jpg')
----> 4     rimg.save(path_save)
      5 
      6     if ((ii+1) % 1000) == 0:

/usr/lib/python2.7/site-packages/PIL/Image.pyc in save(self, fp, format, **params)
   1677 
   1678         if isPath(fp):
-> 1679             fp = builtins.open(fp, "wb")
   1680             close = 1
   1681         else:

IOError: [Errno 27] File too large: '../../data/ndsb/augment/test/26855_rot315.jpg'

In [24]:
if mode == 'single':
    # Testing with a single file
    import matplotlib.pyplot as plt
    %matplotlib inline
    IMAGE_FILE = '/afs/ee.cooper.edu/user/t/a/tam8/data/ndsb/train/acantharia_protist/100224.jpg'
    im = Image.open(IMAGE_FILE)
    rots = make_rotations(im)
    fig, axes = plt.subplots(1,2)
    axes[0].imshow(im, interpolation='none', cmap='gray')
    axes[1].imshow(rots[0][0], interpolation='none', cmap='gray')
    # plt.axis('off')


Out[24]:
<matplotlib.image.AxesImage at 0x7f4ded866e90>