In [1]:
# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)

import sys
import os
import time
import pandas as pd
import numpy as np
sys.path.insert(0, os.path.realpath('..'))
import folium
import django
import sqlite3
django.setup()
from django.db import connections
from pretty import pprint
from firecares.firestation.models import (FireDepartment, FireStation, NFIRSStatistic, FireDepartmentRiskModels,
                                          PopulationClassQuartile)
from fire_risk.models import DIST, DISTMediumHazard, DISTHighHazard
from fire_risk.models.DIST.providers.ahs import ahs_building_areas
from fire_risk.models.DIST.providers.iaff import response_time_distributions
from django.db.models import Avg, Max, Min, Q
from django.contrib.gis.geos import GEOSGeometry
from IPython.display import display
from firecares.utils import lenient_summation, dictfetchall
from firecares.tasks.update import (calculate_department_census_geom, calculate_story_distribution,
                                    calculate_structure_counts, update_performance_score, update_nfirs_counts,
                                    dist_model_for_hazard_level)
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", 100)

def display_geom(geom):
    _map = folium.Map(location=[geom.centroid.y, geom.centroid.x],
                      tiles='Stamen Toner')
    _map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
    ll = geom.extent[1::-1]
    ur = geom.extent[3:1:-1]
    _map.fit_bounds([ll, ur])

    return _map

# Philadephia-specific
fd = FireDepartment.objects.get(id=91907)

In [2]:
q = """SELECT *
    FROM crosstab(
      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
        FROM buildingfires a left join (
          SELECT state,
            fdid,
            inc_date,
            inc_no,
            exp_no,
            geom,
            x.parcel_id,
            x.risk_category
          FROM (select * from incidentaddress a
             left join parcel_risk_category_local b using (parcel_id)
             ) AS x
        ) AS y using (state, inc_date, exp_no, fdid, inc_no)
    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
        and fire_sprd is not null and fire_sprd != ''''
    group by risk_category, fire_sprd
    order by risk_category, fire_sprd ASC')
    AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""

pd.read_sql_query(q, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})


Out[2]:
risk_category object_of_origin room_of_origin floor_of_origin building_of_origin beyond
0 Low 1 NaN NaN NaN NaN
1 Medium 10 5 NaN NaN NaN
2 N/A 7266 2348 673 621 361

In [3]:
update_performance_score(fd.id)


Error updating DIST score: Traceback (most recent call last):
  File "/firecares/firecares/tasks/update.py", line 126, in update_performance_score
    dist = dist_model(floor_extent=False, **counts)
  File "/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py", line 89, in __init__
    raise NotEnoughRecords
NotEnoughRecords
.
Error updating DIST score: Traceback (most recent call last):
  File "/firecares/firecares/tasks/update.py", line 126, in update_performance_score
    dist = dist_model(floor_extent=False, **counts)
  File "/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py", line 383, in __init__
    beyond, **kwargs)
  File "/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py", line 89, in __init__
    raise NotEnoughRecords
NotEnoughRecords
.
/webapps/firecares/local/lib/python2.7/site-packages/numpy/core/numeric.py:294: FutureWarning: in the future, full((10000, 4), -1000) will return an array of dtype('int64')
  format(shape, fill_value, array(fill_value).dtype), FutureWarning)

updating fdid: 91907 - Unknown risk level from: 7.0 to 7.0.
clearing High level from 91907 due to missing categories in aggregation
updating fdid: 91907 - All risk level from: 7.0 to 7.0.

Calculate quartiles based on similar departments


In [4]:
q = """SELECT
            fd."id",
            fd."fdid",
            fd."name",
            fd."state",
            fd."region",
            (SELECT COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) AS "risk_model_size1_percent_size2_percent_sum",
            (SELECT COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) AS "risk_model_deaths_injuries_sum",
            rm."dist_model_score",
            rm."risk_model_deaths",
            rm."risk_model_injuries",
            rm."risk_model_fires",
            rm."risk_model_fires_size0",
            rm."risk_model_fires_size0_percentage",
            rm."risk_model_fires_size1",
            rm."risk_model_fires_size1_percentage",
            rm."risk_model_fires_size2",
            rm."risk_model_fires_size2_percentage",
            fd."population",
            fd."population_class",
            fd."featured",
            nfirs.avg_fires as "residential_fires_avg_3_years",
            rm."level",
            CASE WHEN (rm."risk_model_fires_size1_percentage" IS NOT NULL OR rm."risk_model_fires_size2_percentage" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) ELSE NULL END AS "risk_model_size1_percent_size2_percent_sum_quartile",
            CASE WHEN (rm."risk_model_deaths" IS NOT NULL OR rm."risk_model_injuries" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) ELSE NULL END AS "risk_model_deaths_injuries_sum_quartile",
            CASE WHEN rm."dist_model_score" IS NOT NULL THEN ntile(4) over (partition by rm.dist_model_score is not null, fd.population_class, rm.level order by rm.dist_model_score) ELSE NULL END AS "dist_model_score_quartile",
            CASE WHEN rm."risk_model_deaths" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_deaths is not null, fd.population_class, rm.level order by rm.risk_model_deaths) ELSE NULL END AS "risk_model_deaths_quartile",
            CASE WHEN rm."risk_model_injuries" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_injuries is not null, fd.population_class, rm.level order by rm.risk_model_injuries) ELSE NULL END AS "risk_model_injuries_quartile",
            CASE WHEN rm."risk_model_fires_size0" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size0 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size0) ELSE NULL END AS "risk_model_fires_size0_quartile",
            CASE WHEN rm."risk_model_fires_size1" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size1 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size1) ELSE NULL END AS "risk_model_fires_size1_quartile",
            CASE WHEN rm."risk_model_fires_size2" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size2 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size2) ELSE NULL END AS "risk_model_fires_size2_quartile",
            CASE WHEN rm."risk_model_fires" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires is not null, fd.population_class, rm.level order by rm.risk_model_fires) ELSE NULL END AS "risk_model_fires_quartile",
            CASE WHEN "nfirs"."avg_fires" IS NOT NULL THEN ntile(4) over (partition by avg_fires is not null, fd.population_class, rm.level order by avg_fires) ELSE NULL END AS "residential_fires_avg_3_years_quartile"

        FROM "firestation_firedepartment" fd
        INNER JOIN "firestation_firedepartmentriskmodels" rm ON
            rm.department_id = fd.id
        LEFT JOIN (
            SELECT fire_department_id, AVG(count) as avg_fires, level
            from firestation_nfirsstatistic
            WHERE year >= 2010 and metric='residential_structure_fires'
            GROUP BY fire_department_id, level) nfirs
        ON (fd.id=nfirs.fire_department_id and nfirs.level = rm.level)
        WHERE fd.archived=False and fd.population_class = 9
        ORDER BY fd.id"""

df = pd.read_sql_query(q, connections['default'])

levels = {'0': 'All hazard levels', '1': 'Low hazard', '2': 'Medium hazard', '4': 'High hazard', '5': 'Unknown hazard'}
quartiles = {'1': 'Low risk', '2': 'Medium risk', '3': 'Medium risk', '4': 'High risk'}

# Transformations to human-readable values
df['level'] = df['level'].apply(lambda x: levels[str(x)])
for c in ['risk_model_size1_percent_size2_percent_sum_quartile', 'risk_model_deaths_injuries_sum_quartile', 'dist_model_score_quartile', 'risk_model_deaths_quartile', 'risk_model_injuries_quartile', 'risk_model_fires_size0_quartile', 'risk_model_fires_size1_quartile', 'risk_model_fires_size2_quartile', 'risk_model_fires_quartile', 'residential_fires_avg_3_years_quartile']:
    df[c] = df[c].apply(lambda x: quartiles[str(int(x))] if not np.isnan(x) else 'N/A')

df[df['id'] == fd.id][['id', 'name', 'dist_model_score', 'level']]


Out[4]:
id name dist_model_score level
60 91907 Philadelphia Fire Department NaN Medium hazard
61 91907 Philadelphia Fire Department NaN Low hazard
62 91907 Philadelphia Fire Department 7 All hazard levels
63 91907 Philadelphia Fire Department NaN High hazard
64 91907 Philadelphia Fire Department 7 Unknown hazard

Full-cycle statistics import/calcuations

  • Clear out FireDepartmentRiskModels, NFIRSStatistic
  • Refresh population_quartiles materialized view (to ensure that we're starting w/ a clean slate)
  • Run update_nfirs django command to pull NFIRS stats into FireCARES per department
    • Creates/updated NFIRSStatistic records
    • Depends on NFIRS.buildingfires, FC FireDepartment
  • Run calc-department-owned-tracts django command to calculate the census-tract based department jurisdiction
    • Updates FireDepartment.owned_tract_geom on FC FireDepartment
    • Depends on NFIRS.census_block_groups_2010, NIST.tract_years, FC FireDepartment
  • Run calc-structure-counts django command to determine the structure counts over a department's census-tract-based jurisdiction
    • Updates FireDepartmentRiskModels.structure_count on FC FireDepartmentRiskModels
    • Depends on NFIRS.parcel_risk_category_local, FC FireDepartment.owned_tracts_geom
  • Run calc-story-distribution (not yet completed) to calculate the story distribution for a department's census-tract-based jurisdiction
    • Updates FireDepartmentRiskModels.floor_count_coefficients
    • Depends on NFIRS.LUSE_swg - copied from parcels.LUSE_swg, NFIRS.parcel_stories - subset of parcels.parcels, FC FireDepartment.owned_tracts_geom
  • Run import-predictions to import predictions from Stanley's prediction model
    • Depends on predictions.csv file, FC FireDepartment
    • Recalculates the DIST score for EACH hazard level for FC FireDepartment
  • Re-compute population_quartiles materialized view based on the imported information
    • Depends on FC Firedepartment, FC FireDepartmentRiskModels, FC NFIRSStatistic
  • Run load-dist-scores to perform the DIST score calculation per department
    • Depends on NFIRS.buildingfires, NFIRS.incidentaddress, NFIRS.parcel_risk_category_local - copy of parcels.parcels w/ subset of columns + parcel risk level

In [ ]:
# Clear out FireDepartmentRiskModels since all of this information is derived
FireDepartmentRiskModels.objects.all().delete()
NFIRSStatistic.objects.all().delete()

with connections['default'].cursor() as c:
    c.execute("refresh materialized view population_quartiles;")

assert PopulationClassQuartile.objects.count() == 0

In [ ]:
similar_departments = list(FireDepartment.objects.filter(id__in=fd.similar_departments.values_list('id', flat=True))) + [fd]

In [ ]:
for f in similar_departments:
    update_nfirs_counts.delay(f.id)

Similar departments to Houston...


In [ ]:
df = pd.DataFrame(map(lambda x: (x.id, x.name, x.population, x.region), similar_departments), columns=['id', 'name', 'population', 'region'])
df.sort('population', ascending=False)

In [ ]:
df = pd.read_sql_query("select * from firestation_nfirsstatistic;", connections['default'])
department_count = len(df['fire_department_id'].unique())
year_count = len(df['year'].unique())
metric_type_count = len(df['metric'].unique())
# Should have 5 risk levels (low, medium, high, unknown, all) for each metrics for every year for each department
assert len(df) == department_count * 5 * metric_type_count * year_count

Now that we have NFIRS statistics (which is a dependency of the quartile calculations), we can continue


In [ ]:
# NOTE: this takes some time...
for f in similar_departments:
    calculate_department_census_geom.delay(f.id)
fd.refresh_from_db()
display_geom(fd.owned_tracts_geom)

In [ ]:
# NOTE: this takes some time...
from django.core.management import call_command

dept_ids = map(lambda x: str(x.id), similar_departments)
call_command('import-predictions', '../predictions.2015.csv', '--ids', *dept_ids)  # Also updates the performance score

In [ ]:
with connections['default'].cursor() as c:
    c.execute('refresh materialized view population_quartiles;')
    
PopulationClassQuartile.objects.count()

In [ ]:
# NOTE: this takes a LONG time... (like hours)
for f in similar_departments:
    calculate_structure_counts.delay(f.id)

In [ ]:
for f in similar_departments:
    update_performance_score.delay(f.id)

In [ ]:
q = """SELECT *
    FROM crosstab(
      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
        FROM buildingfires a left join (
          SELECT state,
            fdid,
            inc_date,
            inc_no,
            exp_no,
            geom,
            x.parcel_id,
            x.risk_category
          FROM (select * from incidentaddress a
             left join parcel_risk_category_local b using (parcel_id)
             ) AS x
        ) AS y using (state, inc_date, exp_no, fdid, inc_no)
    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
        and fire_sprd is not null and fire_sprd != ''''
    group by risk_category, fire_sprd
    order by risk_category, fire_sprd ASC')
    AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""

with connections['nfirs'].cursor() as c:
    c.execute(q, {'state': fd.state, 'fdid': fd.fdid})
    results = dictfetchall(c)

all_counts = dict(object_of_origin=0,
                  room_of_origin=0,
                  floor_of_origin=0,
                  building_of_origin=0,
                  beyond=0)

risk_mapping = {'Low': 1, 'Medium': 2, 'High': 4, 'N/A': 5}

print 'ALL RESULTS'
df = pd.DataFrame(results)
display(df)

for result in results:
    print result.get('risk_category')
    dist_model = dist_model_for_hazard_level(result.get('risk_category'))
    
    counts = dict(object_of_origin=result.get('object_of_origin', 0),
                  room_of_origin=result.get('room_of_origin', 0),
                  floor_of_origin=result.get('floor_of_origin', 0),
                  building_of_origin=result.get('building_of_origin', 0),
                  beyond=result.get('beyond', 0))
    
    display(counts)

    # add current risk category to the all risk category
    for key, value in counts.items():
        all_counts[key] += value
        
    ahs_building_size = ahs_building_areas(fd.fdid, fd.state)

    if ahs_building_size is not None:
        counts['building_area_draw'] = ahs_building_size

    response_times = response_time_distributions.get('{0}-{1}'.format(fd.fdid, fd.state))

    if response_times:
        counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)
        
    dist = dist_model(floor_extent=False, **counts)
    print 'SCORE: {}'.format(dist.gibbs_sample())

    
dist_model = dist_model_for_hazard_level('All')

if ahs_building_size is not None:
    all_counts['building_area_draw'] = ahs_building_size
    
if response_times:
    all_counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)
    
dist = dist_model(floor_extent=False, **all_counts)
    
display(all_counts)
print 'SCORE: {}'.format(dist.gibbs_sample())

In [ ]:
update_performance_score.delay(fd.id)

Score sanity checks


In [ ]:
! rm scores.db

In [ ]:
# Using sqlite3 since this might be a long-running process and would hate to have to restart
conn = sqlite3.connect('scores.db')
create = """
CREATE TABLE scores(
    fc_dept_id INT NOT NULL,
    name TEXT NOT NULL,
    low_count REAL NOT NULL,
    med_count REAL NOT NULL,
    high_count REAL NOT NULL,
    unk_count REAL NOT NULL,
    low_percent REAL NOT NULL,
    med_percent REAL NOT NULL,
    high_percent REAL NOT NULL,
    unk_percent REAL NOT NULL,
    target REAL NOT NULL,
    low_score REAL NOT NULL,
    med_score REAL NOT NULL,
    high_score REAL NOT NULL,
    unk_score REAL NOT NULL,
    all_score REAL NOT NULL,
    all_score_diff REAL NOT NULL,
    diff_percentage REAL NOT NULL);
"""
conn.cursor().execute(create)

In [ ]:
conn = sqlite3.connect('scores.db')
qcounts = """SELECT *
    FROM crosstab(
      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
        FROM buildingfires a left join (
          SELECT state,
            fdid,
            inc_date,
            inc_no,
            exp_no,
            geom,
            x.parcel_id,
            x.risk_category
          FROM (select * from incidentaddress a
             left join parcel_risk_category_local b using (parcel_id)
             ) AS x
        ) AS y using (state, inc_date, exp_no, fdid, inc_no)
    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
        and fire_sprd is not null and fire_sprd != ''''
    group by risk_category, fire_sprd
    order by risk_category, fire_sprd ASC')
    AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""

ins = """INSERT INTO scores VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);"""

def already_in(fd_id):
    return conn.cursor().execute('SELECT fc_dept_id FROM scores WHERE fc_dept_id = ?', (fd_id,)).fetchone() is not None

for fd in FireDepartment.priority_departments.all():
    if not already_in(fd.id):
        counts = pd.read_sql_query(qcounts, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})
        df2 = counts.set_index('risk_category')
        counts = df2.sum(axis=1).to_dict()
        chigh, clow, cmed, cna = counts.get('High', 0), counts.get('Low', 0), counts.get('Medium', 0), counts.get('N/A', 0)
        tot = float(chigh + clow + cmed + cna)
        if tot:
            hper = chigh / tot
            lper = clow / tot
            mper = cmed / tot
            nper = cna / tot
            low = fd.metrics.dist_model_score.low or 0
            medium = fd.metrics.dist_model_score.medium or 0
            high = fd.metrics.dist_model_score.high or 0
            unk = fd.metrics.dist_model_score.unknown or 0
            _all = fd.metrics.dist_model_score.all or 0
            target = hper * high + lper * low + mper * medium + nper * unk
            off = _all - target
            diff_per = off / _all if _all else 9e999

            args = (fd.id, fd.name, clow, cmed, chigh, cna, lper, mper, hper, nper, target, low, medium, high, unk, _all, off, diff_per)
            print 'Adding counts for {}'.format(fd.name)
            conn.cursor().execute(ins, args) 
            conn.commit()
        else:
            print 'No counts for {}'.format(fd.name)
    else:
        print 'Skipping {}, already in...'.format(fd.name)

In [ ]:
conn = sqlite3.connect('scores.db')
df = pd.read_sql_query("select * from scores;", conn)
df = df.sort('diff_percentage', ascending=False)
display(df)
df.to_csv('/tmp/scores.csv')