In [1]:
import os, sys
sys.path.insert(0, os.path.realpath('..'))
from firecares.firestation.models import FireDepartment
import csv
import re
import django
import pandas as pd
django.setup()
In [ ]:
# See s3://firecares-share/2016-12 Metro Roster.csv
df = pd.read_csv('../2016-12 Metro Roster.csv')
df = df[pd.notnull(df['Address']) & pd.isnull(df['Country'])]
df = df[['Department', 'Address', 'Email', 'First Name', 'Last Name', 'State']]
df['Address'] = df['Address'].apply(lambda x: re.sub('Ste?\s+.*|\#.*', '', x).strip())
df['Department'] = df['Department'].apply(lambda x: re.sub('Dept|\s{2+}', '', x).strip())
rows = df.to_dict(orient='records')
rows
In [ ]:
print map(lambda x: (x.id, x.name, x.headquarters_address.address_line1), FireDepartment.objects.filter(headquarters_address__address_line1__icontains='210 W San Jacinto Ave', state='CA'))
In [ ]:
print map(lambda x: (x.id, x.name, x.headquarters_address.address_line1), FireDepartment.objects.filter(name__icontains='Richmond Fire', state='VA'))
In [ ]:
counts = {'multiple': 0, 'found': 0, 'nomatches': 0}
ids = []
for r in rows:
fd = FireDepartment.objects.filter(headquarters_address__address_line1__icontains=r['Address'], state=r['State'])
if len(fd) > 1:
print 'MISMATCH POTENTIAL: {} - ({})'.format(r['Department'], r['State'])
counts['multiple'] = counts['multiple'] + 1
ids.append(0)
elif len(fd) == 1:
print 'FOUND: {}'.format(fd.first().name)
counts['found'] = counts['found'] + 1
ids.append(fd.first().id)
else:
# Fall back to finding by name
fd = FireDepartment.objects.filter(name__icontains=r['Department'], state=r['State'])
if fd:
print 'FOUND (by name): {}'.format(fd.first().name)
counts['found'] = counts['found'] + 1
ids.append(fd.first().id)
else:
print 'NO MATCHES: {} - ({})'.format(r['Department'], r['State'])
counts['nomatches'] = counts['nomatches'] + 1
ids.append(0)
print counts