Import Data

  • Walk data directories
  • load images (PIL)
  • resample, flatten, & metadata tag
  • Output

In [ ]:
%matplotlib inline
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import PIL
import os

In [ ]:
NX, NY = 128,128 # max width and height to resample
INPATTERN = '/Users/ajmendez/Downloads/dilbert2012/*.jpg'
OUTFORMAT = '/Users/ajmendez/data/dilbert/2012/{basename}'

In [ ]:
from itertools import product
list(product(['a', 'b'], ['c', 'd', 'e']))

In [ ]:
def convertWeekday(image, outpattern):
    xmin,xmax = [5,199]
    ymin,ymax = [3,197]
    for j, xoffset in enumerate([0, 218, 436]):
        img = image.crop((xoffset+xmin, ymin, xoffset+xmax-xmin, ymax-ymin))
        img.thumbnail((NX,NY), PIL.Image.ANTIALIAS)
        img.convert('L').save(outpattern.format(j=j))

def convertWeekend(image, outpattern):
    xmin,xmax = [9,145]
    ymin,ymax = [1,137]
    xoffsets = [0, 160, 322, 486]
    yoffsets = [0, 149]
    # Remember people read left to right -- top to bottom
    for j,(yoffset, xoffset) in enumerate(product(yoffsets, xoffsets)):
        img = image.crop((xoffset + xmin, 
                          yoffset + ymin, 
                          xoffset + xmax - xmin, 
                          yoffset + ymax - ymin))
        img.thumbnail((NX,NY), PIL.Image.ANTIALIAS)
        img.convert('L').save(outpattern.format(j=j))

In [ ]:
for i,filename in enumerate(tqdm(sorted(glob.glob(INPATTERN)))):
    
    # outpattern is used to output the frames
    basename = os.path.basename(filename).replace('-colour.jpg', '.{j}.jpg')
    dirname = os.path.dirname(OUTFORMAT)
    outpattern = OUTFORMAT.format(basename=basename)
    if os.path.exists(outpattern.format(j=1)):
        continue
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    
    if os.path.getsize(filename) == 0:
        print('Check on: {}'.format(filename))
        continue
#     img = plt.imread(filename)
    with PIL.Image.open(filename) as img:
        if abs(img.height-288) < 2:
            # Sunday... FUNDAY
            convertWeekend(img, outpattern)

        elif abs(img.height-199) < 2:
            # Weekday... workday
            convertWeekday(img, outpattern)

        else:
            raise ValueError('Find out where you should be cropping: {}x{}'.format(img.height, img.width))

Figure out Crop Locations


In [ ]:
filename = '/Users/ajmendez/Downloads/dilbert2012/dilbert20120102-colour.jpg'
img = plt.imread(filename)
img.shape
plt.figure(figsize=(12,12))
# plt.imshow(img[3:196,3:201,:], interpolation='nearest')
xoffset = 0
xoffset = 218
# xoffset = 436
xmin,xmax = [5,199]
ymin,ymax = [2,196]
plt.imshow(img[:,xoffset:xoffset+210,:], interpolation='nearest', alpha=0.5)
print(xmax-xmin, ymax-ymin)
plt.axvline(xmin, lw=0.5, color='r')
plt.axvline(xmax, lw=0.5, color='r')
plt.axhline(ymin, lw=0.5, color='r')
plt.axhline(ymax, lw=0.5, color='r')

In [ ]:
filename = '/Users/ajmendez/Downloads/dilbert2012/dilbert20120101-colour.jpg'
img = plt.imread(filename)
img.shape
plt.figure(figsize=(12,12))
# plt.imshow(img[3:196,3:201,:], interpolation='nearest')
xoffset = 0
xoffset = 160
xoffset = 322
xoffset = 486
yoffset = 0
# yoffset = 149
xmin,xmax = [9,145]
ymin,ymax = [1,137]
plt.imshow(img[yoffset:yoffset+200,xoffset:xoffset+210,:], interpolation='nearest', alpha=0.5)
print(xmax-xmin, ymax-ymin)
plt.axvline(xmin, lw=0.5, color='r')
plt.axvline(xmax, lw=0.5, color='r')
plt.axhline(ymin, lw=0.5, color='r')
plt.axhline(ymax, lw=0.5, color='r')

In [ ]: