In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import PIL.Image
import glob
import rarfile
import numpy as np
import pandas as pd
from collections import namedtuple
from itertools import product

offset = namedtuple('offset', ['width', 'height',
                               'convertMode',
                               'xmin', 'xmax', 'xoffsets',  
                               'ymin', 'ymax', 'yoffsets' ])
offsets = [
    # 1989
    offset(600, 406, 'LA', 1, 125, [20,165, 370, 468], 1, 125, [142,278]), # Sunday 1989
    offset(640, 189, 'LA', 2, 185, [10,230,445], 2, 185, [0]),    # Weekday 1989
    
    offset(640, 186, 'LA', 2, 185, [10,230,445], 2, 185, [0]),    # Weekday 1990
    offset(640, 196, 'LA', 5, 197, [0,218,436], 3, 195, [0]),      # Weekday 1990
    offset(600, 271, 'L', 9, 125, [0, 160, 322, 466], 4, 120, [8, 140]), # weekend 1990
    offset(600, 189, 'L', 5, 182, [7,208,406],  5, 182, [0]),
    offset(600, 191, 'L', 5, 182, [7,208,406], 7, 184, [0]),
    offset(600, 273, 'L', 9, 125, [10, 150, 310, 446], 4, 120, [8, 140]),
    
    # 1991
    offset(640, 304, 'L', 9, 145, [0, 170, 332, 489], 1,137, [10, 162]),
    offset(640, 284, 'L', 9, 140, [0, 170, 322, 489], 1,132, [2, 148]),
    offset(640, 279, 'L', 9, 140, [0, 170, 322, 489], 1,132, [2, 148]),
    
    #1992
    offset(640, 274, 'L', 9, 140, [0, 170, 322, 489], 1,132, [2, 148]),
    offset(600, 196, 'L', 5, 182, [7,208,406], 7, 184, [0]),
    
    #1993
    offset(600, 403, 'L', 1, 125, [20,165, 370, 468], 1, 125, [142,278]), # Sunday 1989
    offset(600, 400, 'L', 1, 125, [20,165, 370, 468], 1, 125, [142,278]), # Sunday 1989
    offset(600, 264, 'L', 9, 125, [0, 160, 322, 466], 4, 120, [8, 140]), # weekend 1990
    
    #1994
    offset(780, 281, 'L', 5, 225, [10, 270, 542],5,225, [10]),
    offset(780, 360, 'L', 10, 160, [10, 205, 400, 590], 10, 160, [5, 180]), # Weekend
    offset(780, 292, 'L', 5, 230, [10, 280, 542],5,230, [15]),
    offset(600, 222, 'L', 9, 180, [10, 205, 405], 9, 180, [5]),
    offset(780, 287, 'L', 5, 225, [10, 270, 532], 10,225, [10]),
    
    offset(640, 199, 'L', 5, 199, [0,218,436], 3, 197, [0]), # Weekday 2012
    offset(640, 288, 'L', 9, 145, [0, 160, 322, 486], 1,137, [0, 149]), # Weekend 2012
    
]

In [ ]:
pattern = '/Users/ajmendez/Dropbox/tmp/dilbert/raw/*.rar'
img = None
sizes = []
years = ['1989', '1990', '1991', '1992', '1993']
for filename in sorted(glob.iglob(pattern)):
    if any([x in filename for x in years]):
        continue
        
    print(filename)
    with rarfile.RarFile(filename) as rf:
        files = sorted(rf.namelist())
        print len(files)
        goodfiles = [f for f in files
                     if ('.gif' in f) or (not (f.replace('.jpg','-colour.jpg') in files) )]
        for f in goodfiles:
            img = PIL.Image.open(rf.open(f))
            sizes.append([img.width, img.height])
            
            hasoffset = any([(np.abs(img.width-off.width) <= 3) &
                             (np.abs(img.height-off.height) <= 3) 
                             for off in offsets])
            if hasoffset:
                continue
            else:
                print(img.width, img.height)
                plt.imshow(np.array(img.convert('L')), cmap=plt.cm.gray)
                break
    break
print len(files), len(goodfiles), len(sizes)

In [ ]:
def convert(image, off):
    plt.figure(figsize=(12,12))
    plt.imshow(np.array(image.convert(off.convertMode)),
               cmap=plt.cm.gray)
    print(off.xmax-off.xmin)
    print(off.ymax-off.ymin)
    for xoffset in off.xoffsets:
        plt.axvline(xoffset+off.xmin, color='r', lw=2)
        plt.axvline(xoffset+off.xmax, color='r', lw=2)
    for yoffset in off.yoffsets:
        plt.axhline(yoffset+off.ymin, color='r', lw=2)
        plt.axhline(yoffset+off.ymax, color='r', lw=2)
#     for j,(yoffset, xoffset) in enumerate(product(offset.yoffsets,offset.xoffsets)):
#         plt.axvline(xoffset, color='r')
#         plt.axhline(yoffset, color='r')
        

off = offset(780, 287, 'L', 5, 225, [10, 270, 532], 10,225, [10])


convert(img, off)

In [ ]:
NX=NY=128
def carveImages(image, off, outpattern=None, convertMode=None):
    if convertMode is None:
        convertMode = off.convertMode
    for k,(yoffset, xoffset) in enumerate(product(off.yoffsets, off.xoffsets)):
        img = image.crop((xoffset + off.xmin, 
                          yoffset + off.ymin, 
                          xoffset + off.xmax - off.xmin, 
                          yoffset + off.ymax - off.ymin))
        img.thumbnail((NX,NY), PIL.Image.ANTIALIAS)
        if outpattern:
            img.convert(convertMode).save(outpattern.format(j=j))
        else:
            yield img.convert(convertMode)

def plotImages(image, off):
    nx,ny = len(off.xoffsets), len(off.yoffsets)
    fig, axes = plt.subplots(ny, nx, figsize=(nx*3, 
                                      ny*3))
    
    for (img,ax) in zip(carveImages(image, off), axes.flatten()):
        plt.sca(ax)
        plt.imshow(np.array(img), interpolation='nearest')

plotImages(img, off)

In [ ]:
import cv2

In [ ]:
np.array(img.convert('L'))/255.0

In [ ]:
plt.imshow(np.array(img.convert('L')), cmap=plt.cm.gray)

In [ ]:
ret, thresh = cv2.threshold(np.array(img.convert('L')), 127, 255, 0)
plt.imshow(thresh, cmap=plt.cm.gray, vmin=0, vmax=255)
contours,hierarchy = cv2.findContours(thresh, 3, 1)
cnt = contours[0]
rect = cv2.minAreaRect(cnt)
box = cv2.boundingRect(rect)
box = np.int0(box)
cv2.drawContours(img,[box],0,(0,0,255),2)

# area = cv2.contourArea(cnt)
# print area

# contours, hierarchy = cv2.findContours(thres)

In [ ]:
#     for xoffset in 
#     xmin,xmax = [9,145]
#     ymin,ymax = [1,137]
#     xoffsets = [0, 160, 322, 486]
#     yoffsets = [0, 149]
#     # Remember people read left to right -- top to bottom
#     for j,(yoffset, xoffset) in enumerate(product(yoffsets, xoffsets)):
#         img = image.crop((xoffset + xmin, 
#                           yoffset + ymin, 
#                           xoffset + xmax - xmin, 
#                           yoffset + ymax - ymin))
#         img.thumbnail((NX,NY), PIL.Image.ANTIALIAS)
#         img.convert('L').save(outpattern.format(j=j))