In [12]:
import binascii
def load_anpa_file(path, debug=False):
data = ''
lines = []
subs = {'1e': ';', '1d': ';'}
with open(path, 'rb') as fh:
byte = fh.read(1)
last = byte
data += byte
if debug:
print binascii.hexlify(byte), byte
while byte != "":
byte = fh.read(1)
if binascii.hexlify(byte) in subs:
byte = subs[binascii.hexlify(byte)]
if debug:
print binascii.hexlify(byte), byte
data += byte
if binascii.hexlify(last + byte) == '0d0a':
if debug:
print data
lines.append(data)
data = ''
last = byte
return lines
In [15]:
def cleaner(string, debug=False):
cleaned = ''
banned = ['0a', '0d', '1f', '06', '08', '19']
for char in string:
if binascii.hexlify(char) not in banned:
if debug:
print binascii.hexlify(char), char
cleaned += char
return cleaned.strip()
def region_parser(lines):
state = ''
header = []
results = {}
for line in lines:
if binascii.hexlify(line) == '0d0a':
return results
line = cleaner(line)
if line == '^By The Associated Press=':
state = 'EnterHeader'
elif line.find('returns from Illinois by Geographic Region') != -1 and line.endswith('<'):
state = 'EndHeader'
elif line.startswith('^') and line.endswith('<'):
state = 'BeginRegions'
elif state == 'BeginRegions':
state = 'InRegions'
elif state == 'InRegions' and line.startswith('^RegTotals'):
state = 'EndRegions'
if state == 'EndHeader' and line.find(';') != -1 and len(line.split(';')):
raw = line.split(';')
for r in raw:
if cleaner(r):
header.append(cleaner(r))
elif state == 'InRegions' and line.find(';') != -1 and len(line.split(';')):
raw = line.split(';')
idx = 0
region = ''
for r in raw:
temp = cleaner(r)
if temp:
if idx == 0:
region = temp
results[region] = {}
else:
results[region][header[idx - 1]] = int(temp.replace(',', ''))
idx += 1
In [25]:
def print_results(results):
special = ['Chicago', 'CookSuburb', 'DuPage', 'Kane', 'Kendall', 'Lake', 'McHenry', 'Will']
banned = ['PR', 'TP']
for region in special:
print region, results[region]
downstate = {}
total = {}
for key in results[special[0]].keys():
downstate[key] = 0
total[key] = 0
for region in results:
for key in results[special[0]].keys():
total[key] += results[region][key]
if region not in special:
for key in results[region]:
downstate[key] += results[region][key]
print 'Downstate', downstate
print 'Total', total
print_results(region_parser(load_anpa_file('IL-Dem-Pres-Reg-Geo-2-Takes.anpa')))
In [ ]: