notebook.community



In [1]:

    
# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)

import sys
import os
import time
import pandas as pd
import numpy as np
sys.path.insert(0, os.path.realpath('..'))
import folium
import django
import sqlite3
django.setup()
from django.db import connections
from pretty import pprint
from firecares.firestation.models import (FireDepartment, FireStation, NFIRSStatistic, FireDepartmentRiskModels,
                                          PopulationClassQuartile)
from fire_risk.models import DIST, DISTMediumHazard, DISTHighHazard
from fire_risk.models.DIST.providers.ahs import ahs_building_areas
from fire_risk.models.DIST.providers.iaff import response_time_distributions
from django.db.models import Avg, Max, Min, Q
from django.contrib.gis.geos import GEOSGeometry
from IPython.display import display
from firecares.utils import lenient_summation, dictfetchall
from firecares.tasks.update import (calculate_department_census_geom, calculate_story_distribution,
                                    calculate_structure_counts, update_performance_score, update_nfirs_counts,
                                    dist_model_for_hazard_level)
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", 100)

def display_geom(geom):
    _map = folium.Map(location=[geom.centroid.y, geom.centroid.x],
                      tiles='Stamen Toner')
    _map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
    ll = geom.extent[1::-1]
    ur = geom.extent[3:1:-1]
    _map.fit_bounds([ll, ur])

    return _map

# Philadephia-specific
fd = FireDepartment.objects.get(id=91907)



In [2]:

    
q = """SELECT *
    FROM crosstab(
      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
        FROM buildingfires a left join (
          SELECT state,
            fdid,
            inc_date,
            inc_no,
            exp_no,
            geom,
            x.parcel_id,
            x.risk_category
          FROM (select * from incidentaddress a
             left join parcel_risk_category_local b using (parcel_id)
             ) AS x
        ) AS y using (state, inc_date, exp_no, fdid, inc_no)
    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
        and fire_sprd is not null and fire_sprd != ''''
    group by risk_category, fire_sprd
    order by risk_category, fire_sprd ASC')
    AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""

pd.read_sql_query(q, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})









    Out[2]:






  
    
      
      risk_category
      object_of_origin
      room_of_origin
      floor_of_origin
      building_of_origin
      beyond
    
  
  
    
      0
      Low
      1
      NaN
      NaN
      NaN
      NaN
    
    
      1
      Medium
      10
      5
      NaN
      NaN
      NaN
    
    
      2
      N/A
      7266
      2348
      673
      621
      361



In [3]:

    
update_performance_score(fd.id)









    



Error updating DIST score: Traceback (most recent call last):
  File "/firecares/firecares/tasks/update.py", line 126, in update_performance_score
    dist = dist_model(floor_extent=False, **counts)
  File "/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py", line 89, in __init__
    raise NotEnoughRecords
NotEnoughRecords
.
Error updating DIST score: Traceback (most recent call last):
  File "/firecares/firecares/tasks/update.py", line 126, in update_performance_score
    dist = dist_model(floor_extent=False, **counts)
  File "/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py", line 383, in __init__
    beyond, **kwargs)
  File "/webapps/firecares/src/fire-risk/fire_risk/models/DIST/DIST.py", line 89, in __init__
    raise NotEnoughRecords
NotEnoughRecords
.






    



/webapps/firecares/local/lib/python2.7/site-packages/numpy/core/numeric.py:294: FutureWarning: in the future, full((10000, 4), -1000) will return an array of dtype('int64')
  format(shape, fill_value, array(fill_value).dtype), FutureWarning)







    



updating fdid: 91907 - Unknown risk level from: 7.0 to 7.0.
clearing High level from 91907 due to missing categories in aggregation
updating fdid: 91907 - All risk level from: 7.0 to 7.0.

Calculate quartiles based on similar departments



In [4]:

    
q = """SELECT
            fd."id",
            fd."fdid",
            fd."name",
            fd."state",
            fd."region",
            (SELECT COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) AS "risk_model_size1_percent_size2_percent_sum",
            (SELECT COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) AS "risk_model_deaths_injuries_sum",
            rm."dist_model_score",
            rm."risk_model_deaths",
            rm."risk_model_injuries",
            rm."risk_model_fires",
            rm."risk_model_fires_size0",
            rm."risk_model_fires_size0_percentage",
            rm."risk_model_fires_size1",
            rm."risk_model_fires_size1_percentage",
            rm."risk_model_fires_size2",
            rm."risk_model_fires_size2_percentage",
            fd."population",
            fd."population_class",
            fd."featured",
            nfirs.avg_fires as "residential_fires_avg_3_years",
            rm."level",
            CASE WHEN (rm."risk_model_fires_size1_percentage" IS NOT NULL OR rm."risk_model_fires_size2_percentage" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) ELSE NULL END AS "risk_model_size1_percent_size2_percent_sum_quartile",
            CASE WHEN (rm."risk_model_deaths" IS NOT NULL OR rm."risk_model_injuries" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) ELSE NULL END AS "risk_model_deaths_injuries_sum_quartile",
            CASE WHEN rm."dist_model_score" IS NOT NULL THEN ntile(4) over (partition by rm.dist_model_score is not null, fd.population_class, rm.level order by rm.dist_model_score) ELSE NULL END AS "dist_model_score_quartile",
            CASE WHEN rm."risk_model_deaths" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_deaths is not null, fd.population_class, rm.level order by rm.risk_model_deaths) ELSE NULL END AS "risk_model_deaths_quartile",
            CASE WHEN rm."risk_model_injuries" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_injuries is not null, fd.population_class, rm.level order by rm.risk_model_injuries) ELSE NULL END AS "risk_model_injuries_quartile",
            CASE WHEN rm."risk_model_fires_size0" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size0 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size0) ELSE NULL END AS "risk_model_fires_size0_quartile",
            CASE WHEN rm."risk_model_fires_size1" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size1 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size1) ELSE NULL END AS "risk_model_fires_size1_quartile",
            CASE WHEN rm."risk_model_fires_size2" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size2 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size2) ELSE NULL END AS "risk_model_fires_size2_quartile",
            CASE WHEN rm."risk_model_fires" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires is not null, fd.population_class, rm.level order by rm.risk_model_fires) ELSE NULL END AS "risk_model_fires_quartile",
            CASE WHEN "nfirs"."avg_fires" IS NOT NULL THEN ntile(4) over (partition by avg_fires is not null, fd.population_class, rm.level order by avg_fires) ELSE NULL END AS "residential_fires_avg_3_years_quartile"

        FROM "firestation_firedepartment" fd
        INNER JOIN "firestation_firedepartmentriskmodels" rm ON
            rm.department_id = fd.id
        LEFT JOIN (
            SELECT fire_department_id, AVG(count) as avg_fires, level
            from firestation_nfirsstatistic
            WHERE year >= 2010 and metric='residential_structure_fires'
            GROUP BY fire_department_id, level) nfirs
        ON (fd.id=nfirs.fire_department_id and nfirs.level = rm.level)
        WHERE fd.archived=False and fd.population_class = 9
        ORDER BY fd.id"""

df = pd.read_sql_query(q, connections['default'])

levels = {'0': 'All hazard levels', '1': 'Low hazard', '2': 'Medium hazard', '4': 'High hazard', '5': 'Unknown hazard'}
quartiles = {'1': 'Low risk', '2': 'Medium risk', '3': 'Medium risk', '4': 'High risk'}

# Transformations to human-readable values
df['level'] = df['level'].apply(lambda x: levels[str(x)])
for c in ['risk_model_size1_percent_size2_percent_sum_quartile', 'risk_model_deaths_injuries_sum_quartile', 'dist_model_score_quartile', 'risk_model_deaths_quartile', 'risk_model_injuries_quartile', 'risk_model_fires_size0_quartile', 'risk_model_fires_size1_quartile', 'risk_model_fires_size2_quartile', 'risk_model_fires_quartile', 'residential_fires_avg_3_years_quartile']:
    df[c] = df[c].apply(lambda x: quartiles[str(int(x))] if not np.isnan(x) else 'N/A')

df[df['id'] == fd.id][['id', 'name', 'dist_model_score', 'level']]









    Out[4]:






  
    
      
      id
      name
      dist_model_score
      level
    
  
  
    
      60
      91907
      Philadelphia Fire Department
      NaN
      Medium hazard
    
    
      61
      91907
      Philadelphia Fire Department
      NaN
      Low hazard
    
    
      62
      91907
      Philadelphia Fire Department
      7
      All hazard levels
    
    
      63
      91907
      Philadelphia Fire Department
      NaN
      High hazard
    
    
      64
      91907
      Philadelphia Fire Department
      7
      Unknown hazard

Full-cycle statistics import/calcuations

Clear out FireDepartmentRiskModels, NFIRSStatistic
Refresh population_quartiles materialized view (to ensure that we're starting w/ a clean slate)
Run update_nfirs django command to pull NFIRS stats into FireCARES per department
- Creates/updated NFIRSStatistic records
- Depends on NFIRS.buildingfires, FC FireDepartment
Run calc-department-owned-tracts django command to calculate the census-tract based department jurisdiction
- Updates FireDepartment.owned_tract_geom on FC FireDepartment
- Depends on NFIRS.census_block_groups_2010, NIST.tract_years, FC FireDepartment
Run calc-structure-counts django command to determine the structure counts over a department's census-tract-based jurisdiction
- Updates FireDepartmentRiskModels.structure_count on FC FireDepartmentRiskModels
- Depends on NFIRS.parcel_risk_category_local, FC FireDepartment.owned_tracts_geom
Run calc-story-distribution (not yet completed) to calculate the story distribution for a department's census-tract-based jurisdiction
- Updates FireDepartmentRiskModels.floor_count_coefficients
- Depends on NFIRS.LUSE_swg - copied from parcels.LUSE_swg, NFIRS.parcel_stories - subset of parcels.parcels, FC FireDepartment.owned_tracts_geom
Run import-predictions to import predictions from Stanley's prediction model
- Depends on predictions.csv file, FC FireDepartment
- Recalculates the DIST score for EACH hazard level for FC FireDepartment
Re-compute population_quartiles materialized view based on the imported information
- Depends on FC Firedepartment, FC FireDepartmentRiskModels, FC NFIRSStatistic
Run load-dist-scores to perform the DIST score calculation per department
- Depends on NFIRS.buildingfires, NFIRS.incidentaddress, NFIRS.parcel_risk_category_local - copy of parcels.parcels w/ subset of columns + parcel risk level



In [ ]:

    
# Clear out FireDepartmentRiskModels since all of this information is derived
FireDepartmentRiskModels.objects.all().delete()
NFIRSStatistic.objects.all().delete()

with connections['default'].cursor() as c:
    c.execute("refresh materialized view population_quartiles;")

assert PopulationClassQuartile.objects.count() == 0



In [ ]:

    
similar_departments = list(FireDepartment.objects.filter(id__in=fd.similar_departments.values_list('id', flat=True))) + [fd]



In [ ]:

    
for f in similar_departments:
    update_nfirs_counts.delay(f.id)

Similar departments to Houston...



In [ ]:

    
df = pd.DataFrame(map(lambda x: (x.id, x.name, x.population, x.region), similar_departments), columns=['id', 'name', 'population', 'region'])
df.sort('population', ascending=False)



In [ ]:

    
df = pd.read_sql_query("select * from firestation_nfirsstatistic;", connections['default'])
department_count = len(df['fire_department_id'].unique())
year_count = len(df['year'].unique())
metric_type_count = len(df['metric'].unique())
# Should have 5 risk levels (low, medium, high, unknown, all) for each metrics for every year for each department
assert len(df) == department_count * 5 * metric_type_count * year_count

Now that we have NFIRS statistics (which is a dependency of the quartile calculations), we can continue



In [ ]:

    
# NOTE: this takes some time...
for f in similar_departments:
    calculate_department_census_geom.delay(f.id)
fd.refresh_from_db()
display_geom(fd.owned_tracts_geom)



In [ ]:

    
# NOTE: this takes some time...
from django.core.management import call_command

dept_ids = map(lambda x: str(x.id), similar_departments)
call_command('import-predictions', '../predictions.2015.csv', '--ids', *dept_ids)  # Also updates the performance score



In [ ]:

    
with connections['default'].cursor() as c:
    c.execute('refresh materialized view population_quartiles;')
    
PopulationClassQuartile.objects.count()



In [ ]:

    
# NOTE: this takes a LONG time... (like hours)
for f in similar_departments:
    calculate_structure_counts.delay(f.id)



In [ ]:

    
for f in similar_departments:
    update_performance_score.delay(f.id)



In [ ]:

    
q = """SELECT *
    FROM crosstab(
      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
        FROM buildingfires a left join (
          SELECT state,
            fdid,
            inc_date,
            inc_no,
            exp_no,
            geom,
            x.parcel_id,
            x.risk_category
          FROM (select * from incidentaddress a
             left join parcel_risk_category_local b using (parcel_id)
             ) AS x
        ) AS y using (state, inc_date, exp_no, fdid, inc_no)
    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
        and fire_sprd is not null and fire_sprd != ''''
    group by risk_category, fire_sprd
    order by risk_category, fire_sprd ASC')
    AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""

with connections['nfirs'].cursor() as c:
    c.execute(q, {'state': fd.state, 'fdid': fd.fdid})
    results = dictfetchall(c)

all_counts = dict(object_of_origin=0,
                  room_of_origin=0,
                  floor_of_origin=0,
                  building_of_origin=0,
                  beyond=0)

risk_mapping = {'Low': 1, 'Medium': 2, 'High': 4, 'N/A': 5}

print 'ALL RESULTS'
df = pd.DataFrame(results)
display(df)

for result in results:
    print result.get('risk_category')
    dist_model = dist_model_for_hazard_level(result.get('risk_category'))
    
    counts = dict(object_of_origin=result.get('object_of_origin', 0),
                  room_of_origin=result.get('room_of_origin', 0),
                  floor_of_origin=result.get('floor_of_origin', 0),
                  building_of_origin=result.get('building_of_origin', 0),
                  beyond=result.get('beyond', 0))
    
    display(counts)

    # add current risk category to the all risk category
    for key, value in counts.items():
        all_counts[key] += value
        
    ahs_building_size = ahs_building_areas(fd.fdid, fd.state)

    if ahs_building_size is not None:
        counts['building_area_draw'] = ahs_building_size

    response_times = response_time_distributions.get('{0}-{1}'.format(fd.fdid, fd.state))

    if response_times:
        counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)
        
    dist = dist_model(floor_extent=False, **counts)
    print 'SCORE: {}'.format(dist.gibbs_sample())

    
dist_model = dist_model_for_hazard_level('All')

if ahs_building_size is not None:
    all_counts['building_area_draw'] = ahs_building_size
    
if response_times:
    all_counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)
    
dist = dist_model(floor_extent=False, **all_counts)
    
display(all_counts)
print 'SCORE: {}'.format(dist.gibbs_sample())



In [ ]:

    
update_performance_score.delay(fd.id)

Score sanity checks



In [ ]:

    
! rm scores.db



In [ ]:

    
# Using sqlite3 since this might be a long-running process and would hate to have to restart
conn = sqlite3.connect('scores.db')
create = """
CREATE TABLE scores(
    fc_dept_id INT NOT NULL,
    name TEXT NOT NULL,
    low_count REAL NOT NULL,
    med_count REAL NOT NULL,
    high_count REAL NOT NULL,
    unk_count REAL NOT NULL,
    low_percent REAL NOT NULL,
    med_percent REAL NOT NULL,
    high_percent REAL NOT NULL,
    unk_percent REAL NOT NULL,
    target REAL NOT NULL,
    low_score REAL NOT NULL,
    med_score REAL NOT NULL,
    high_score REAL NOT NULL,
    unk_score REAL NOT NULL,
    all_score REAL NOT NULL,
    all_score_diff REAL NOT NULL,
    diff_percentage REAL NOT NULL);
"""
conn.cursor().execute(create)



In [ ]:

    
conn = sqlite3.connect('scores.db')
qcounts = """SELECT *
    FROM crosstab(
      'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
        FROM buildingfires a left join (
          SELECT state,
            fdid,
            inc_date,
            inc_no,
            exp_no,
            geom,
            x.parcel_id,
            x.risk_category
          FROM (select * from incidentaddress a
             left join parcel_risk_category_local b using (parcel_id)
             ) AS x
        ) AS y using (state, inc_date, exp_no, fdid, inc_no)
    where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
        and fire_sprd is not null and fire_sprd != ''''
    group by risk_category, fire_sprd
    order by risk_category, fire_sprd ASC')
    AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""

ins = """INSERT INTO scores VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);"""

def already_in(fd_id):
    return conn.cursor().execute('SELECT fc_dept_id FROM scores WHERE fc_dept_id = ?', (fd_id,)).fetchone() is not None

for fd in FireDepartment.priority_departments.all():
    if not already_in(fd.id):
        counts = pd.read_sql_query(qcounts, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})
        df2 = counts.set_index('risk_category')
        counts = df2.sum(axis=1).to_dict()
        chigh, clow, cmed, cna = counts.get('High', 0), counts.get('Low', 0), counts.get('Medium', 0), counts.get('N/A', 0)
        tot = float(chigh + clow + cmed + cna)
        if tot:
            hper = chigh / tot
            lper = clow / tot
            mper = cmed / tot
            nper = cna / tot
            low = fd.metrics.dist_model_score.low or 0
            medium = fd.metrics.dist_model_score.medium or 0
            high = fd.metrics.dist_model_score.high or 0
            unk = fd.metrics.dist_model_score.unknown or 0
            _all = fd.metrics.dist_model_score.all or 0
            target = hper * high + lper * low + mper * medium + nper * unk
            off = _all - target
            diff_per = off / _all if _all else 9e999

            args = (fd.id, fd.name, clow, cmed, chigh, cna, lper, mper, hper, nper, target, low, medium, high, unk, _all, off, diff_per)
            print 'Adding counts for {}'.format(fd.name)
            conn.cursor().execute(ins, args) 
            conn.commit()
        else:
            print 'No counts for {}'.format(fd.name)
    else:
        print 'Skipping {}, already in...'.format(fd.name)



In [ ]:

    
conn = sqlite3.connect('scores.db')
df = pd.read_sql_query("select * from scores;", conn)
df = df.sort('diff_percentage', ascending=False)
display(df)
df.to_csv('/tmp/scores.csv')

	id	name	dist_model_score	level
60	91907	Philadelphia Fire Department	NaN	Medium hazard
61	91907	Philadelphia Fire Department	NaN	Low hazard
62	91907	Philadelphia Fire Department	7	All hazard levels
63	91907	Philadelphia Fire Department	NaN	High hazard
64	91907	Philadelphia Fire Department	7	Unknown hazard

Calculate quartiles based on similar departments

Full-cycle statistics import/calcuations

At a minimum-we need to import the department and ALL related departments to get an accurate score

Similar departments to Houston...

Now that we have NFIRS statistics (which is a dependency of the quartile calculations), we can continue

Score sanity checks