In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import PIL.Image
import glob
import rarfile
import numpy as np
import pandas as pd
from collections import namedtuple
from itertools import product
offset = namedtuple('offset', ['width', 'height',
'convertMode',
'xmin', 'xmax', 'xoffsets',
'ymin', 'ymax', 'yoffsets' ])
offsets = [
# 1989
offset(600, 406, 'LA', 1, 125, [20,165, 370, 468], 1, 125, [142,278]), # Sunday 1989
offset(640, 189, 'LA', 2, 185, [10,230,445], 2, 185, [0]), # Weekday 1989
offset(640, 186, 'LA', 2, 185, [10,230,445], 2, 185, [0]), # Weekday 1990
offset(640, 196, 'LA', 5, 197, [0,218,436], 3, 195, [0]), # Weekday 1990
offset(600, 271, 'L', 9, 125, [0, 160, 322, 466], 4, 120, [8, 140]), # weekend 1990
offset(600, 189, 'L', 5, 182, [7,208,406], 5, 182, [0]),
offset(600, 191, 'L', 5, 182, [7,208,406], 7, 184, [0]),
offset(600, 273, 'L', 9, 125, [10, 150, 310, 446], 4, 120, [8, 140]),
# 1991
offset(640, 304, 'L', 9, 145, [0, 170, 332, 489], 1,137, [10, 162]),
offset(640, 284, 'L', 9, 140, [0, 170, 322, 489], 1,132, [2, 148]),
offset(640, 279, 'L', 9, 140, [0, 170, 322, 489], 1,132, [2, 148]),
#1992
offset(640, 274, 'L', 9, 140, [0, 170, 322, 489], 1,132, [2, 148]),
offset(600, 196, 'L', 5, 182, [7,208,406], 7, 184, [0]),
#1993
offset(600, 403, 'L', 1, 125, [20,165, 370, 468], 1, 125, [142,278]), # Sunday 1989
offset(600, 400, 'L', 1, 125, [20,165, 370, 468], 1, 125, [142,278]), # Sunday 1989
offset(600, 264, 'L', 9, 125, [0, 160, 322, 466], 4, 120, [8, 140]), # weekend 1990
#1994
offset(780, 281, 'L', 5, 225, [10, 270, 542],5,225, [10]),
offset(780, 360, 'L', 10, 160, [10, 205, 400, 590], 10, 160, [5, 180]), # Weekend
offset(780, 292, 'L', 5, 230, [10, 280, 542],5,230, [15]),
offset(600, 222, 'L', 9, 180, [10, 205, 405], 9, 180, [5]),
offset(780, 287, 'L', 5, 225, [10, 270, 532], 10,225, [10]),
offset(640, 199, 'L', 5, 199, [0,218,436], 3, 197, [0]), # Weekday 2012
offset(640, 288, 'L', 9, 145, [0, 160, 322, 486], 1,137, [0, 149]), # Weekend 2012
]
In [ ]:
pattern = '/Users/ajmendez/Dropbox/tmp/dilbert/raw/*.rar'
img = None
sizes = []
years = ['1989', '1990', '1991', '1992', '1993']
for filename in sorted(glob.iglob(pattern)):
if any([x in filename for x in years]):
continue
print(filename)
with rarfile.RarFile(filename) as rf:
files = sorted(rf.namelist())
print len(files)
goodfiles = [f for f in files
if ('.gif' in f) or (not (f.replace('.jpg','-colour.jpg') in files) )]
for f in goodfiles:
img = PIL.Image.open(rf.open(f))
sizes.append([img.width, img.height])
hasoffset = any([(np.abs(img.width-off.width) <= 3) &
(np.abs(img.height-off.height) <= 3)
for off in offsets])
if hasoffset:
continue
else:
print(img.width, img.height)
plt.imshow(np.array(img.convert('L')), cmap=plt.cm.gray)
break
break
print len(files), len(goodfiles), len(sizes)
In [ ]:
def convert(image, off):
plt.figure(figsize=(12,12))
plt.imshow(np.array(image.convert(off.convertMode)),
cmap=plt.cm.gray)
print(off.xmax-off.xmin)
print(off.ymax-off.ymin)
for xoffset in off.xoffsets:
plt.axvline(xoffset+off.xmin, color='r', lw=2)
plt.axvline(xoffset+off.xmax, color='r', lw=2)
for yoffset in off.yoffsets:
plt.axhline(yoffset+off.ymin, color='r', lw=2)
plt.axhline(yoffset+off.ymax, color='r', lw=2)
# for j,(yoffset, xoffset) in enumerate(product(offset.yoffsets,offset.xoffsets)):
# plt.axvline(xoffset, color='r')
# plt.axhline(yoffset, color='r')
off = offset(780, 287, 'L', 5, 225, [10, 270, 532], 10,225, [10])
convert(img, off)
In [ ]:
NX=NY=128
def carveImages(image, off, outpattern=None, convertMode=None):
if convertMode is None:
convertMode = off.convertMode
for k,(yoffset, xoffset) in enumerate(product(off.yoffsets, off.xoffsets)):
img = image.crop((xoffset + off.xmin,
yoffset + off.ymin,
xoffset + off.xmax - off.xmin,
yoffset + off.ymax - off.ymin))
img.thumbnail((NX,NY), PIL.Image.ANTIALIAS)
if outpattern:
img.convert(convertMode).save(outpattern.format(j=j))
else:
yield img.convert(convertMode)
def plotImages(image, off):
nx,ny = len(off.xoffsets), len(off.yoffsets)
fig, axes = plt.subplots(ny, nx, figsize=(nx*3,
ny*3))
for (img,ax) in zip(carveImages(image, off), axes.flatten()):
plt.sca(ax)
plt.imshow(np.array(img), interpolation='nearest')
plotImages(img, off)
In [ ]:
import cv2
In [ ]:
np.array(img.convert('L'))/255.0
In [ ]:
plt.imshow(np.array(img.convert('L')), cmap=plt.cm.gray)
In [ ]:
ret, thresh = cv2.threshold(np.array(img.convert('L')), 127, 255, 0)
plt.imshow(thresh, cmap=plt.cm.gray, vmin=0, vmax=255)
contours,hierarchy = cv2.findContours(thresh, 3, 1)
cnt = contours[0]
rect = cv2.minAreaRect(cnt)
box = cv2.boundingRect(rect)
box = np.int0(box)
cv2.drawContours(img,[box],0,(0,0,255),2)
# area = cv2.contourArea(cnt)
# print area
# contours, hierarchy = cv2.findContours(thres)
In [ ]:
# for xoffset in
# xmin,xmax = [9,145]
# ymin,ymax = [1,137]
# xoffsets = [0, 160, 322, 486]
# yoffsets = [0, 149]
# # Remember people read left to right -- top to bottom
# for j,(yoffset, xoffset) in enumerate(product(yoffsets, xoffsets)):
# img = image.crop((xoffset + xmin,
# yoffset + ymin,
# xoffset + xmax - xmin,
# yoffset + ymax - ymin))
# img.thumbnail((NX,NY), PIL.Image.ANTIALIAS)
# img.convert('L').save(outpattern.format(j=j))