In [1]:
import os, sys
sys.path.insert(0, os.path.realpath('..'))
from firecares.firestation.models import FireDepartment
import csv
import re
import django
import pandas as pd
django.setup()

In [ ]:
# See s3://firecares-share/2016-12 Metro Roster.csv

df = pd.read_csv('../2016-12 Metro Roster.csv')
df = df[pd.notnull(df['Address']) & pd.isnull(df['Country'])]
df = df[['Department', 'Address', 'Email', 'First Name', 'Last Name', 'State']]
df['Address'] = df['Address'].apply(lambda x: re.sub('Ste?\s+.*|\#.*', '', x).strip())
df['Department'] = df['Department'].apply(lambda x: re.sub('Dept|\s{2+}', '', x).strip())
rows = df.to_dict(orient='records')
rows

In [ ]:
print map(lambda x: (x.id, x.name, x.headquarters_address.address_line1), FireDepartment.objects.filter(headquarters_address__address_line1__icontains='210 W San Jacinto Ave', state='CA'))

In [ ]:
print map(lambda x: (x.id, x.name, x.headquarters_address.address_line1), FireDepartment.objects.filter(name__icontains='Richmond Fire', state='VA'))

In [ ]:
counts = {'multiple': 0, 'found': 0, 'nomatches': 0}
ids = []
for r in rows:
    fd = FireDepartment.objects.filter(headquarters_address__address_line1__icontains=r['Address'], state=r['State'])
    if len(fd) > 1:
        print 'MISMATCH POTENTIAL: {} - ({})'.format(r['Department'], r['State'])
        counts['multiple'] = counts['multiple'] + 1
        ids.append(0)
    elif len(fd) == 1:
        print 'FOUND: {}'.format(fd.first().name)
        counts['found'] = counts['found'] + 1
        ids.append(fd.first().id)
    else:
        # Fall back to finding by name
        fd = FireDepartment.objects.filter(name__icontains=r['Department'], state=r['State'])
        if fd:
            print 'FOUND (by name): {}'.format(fd.first().name)
            counts['found'] = counts['found'] + 1
            ids.append(fd.first().id)
        else:
            print 'NO MATCHES: {} - ({})'.format(r['Department'], r['State'])
            counts['nomatches'] = counts['nomatches'] + 1
            ids.append(0)
        
print counts