In [1]:
# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)
import sys
import os
import time
import pandas as pd
import numpy as np
sys.path.insert(0, os.path.realpath('..'))
import folium
import django
import sqlite3
django.setup()
from django.db import connections
from pretty import pprint
from firecares.firestation.models import (FireDepartment, FireStation, NFIRSStatistic, FireDepartmentRiskModels,
PopulationClassQuartile)
from fire_risk.models import DIST, DISTMediumHazard, DISTHighHazard
from fire_risk.models.DIST.providers.ahs import ahs_building_areas
from fire_risk.models.DIST.providers.iaff import response_time_distributions
from django.db.models import Avg, Max, Min, Q
from django.contrib.gis.geos import GEOSGeometry
from IPython.display import display
from firecares.utils import lenient_summation, dictfetchall
from firecares.tasks.update import (calculate_department_census_geom, calculate_story_distribution,
calculate_structure_counts, update_performance_score, update_nfirs_counts,
dist_model_for_hazard_level)
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", 100)
def display_geom(geom):
_map = folium.Map(location=[geom.centroid.y, geom.centroid.x],
tiles='Stamen Toner')
_map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
ll = geom.extent[1::-1]
ur = geom.extent[3:1:-1]
_map.fit_bounds([ll, ur])
return _map
# Philadephia-specific
fd = FireDepartment.objects.get(id=91907)
In [2]:
q = """SELECT *
FROM crosstab(
'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
FROM buildingfires a left join (
SELECT state,
fdid,
inc_date,
inc_no,
exp_no,
geom,
x.parcel_id,
x.risk_category
FROM (select * from incidentaddress a
left join parcel_risk_category_local b using (parcel_id)
) AS x
) AS y using (state, inc_date, exp_no, fdid, inc_no)
where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
and fire_sprd is not null and fire_sprd != ''''
group by risk_category, fire_sprd
order by risk_category, fire_sprd ASC')
AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""
pd.read_sql_query(q, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})
Out[2]:
In [3]:
update_performance_score(fd.id)
In [4]:
q = """SELECT
fd."id",
fd."fdid",
fd."name",
fd."state",
fd."region",
(SELECT COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) AS "risk_model_size1_percent_size2_percent_sum",
(SELECT COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) AS "risk_model_deaths_injuries_sum",
rm."dist_model_score",
rm."risk_model_deaths",
rm."risk_model_injuries",
rm."risk_model_fires",
rm."risk_model_fires_size0",
rm."risk_model_fires_size0_percentage",
rm."risk_model_fires_size1",
rm."risk_model_fires_size1_percentage",
rm."risk_model_fires_size2",
rm."risk_model_fires_size2_percentage",
fd."population",
fd."population_class",
fd."featured",
nfirs.avg_fires as "residential_fires_avg_3_years",
rm."level",
CASE WHEN (rm."risk_model_fires_size1_percentage" IS NOT NULL OR rm."risk_model_fires_size2_percentage" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) ELSE NULL END AS "risk_model_size1_percent_size2_percent_sum_quartile",
CASE WHEN (rm."risk_model_deaths" IS NOT NULL OR rm."risk_model_injuries" IS NOT NULL) THEN ntile(4) over (partition by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0) != 0, fd.population_class, rm.level order by COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) ELSE NULL END AS "risk_model_deaths_injuries_sum_quartile",
CASE WHEN rm."dist_model_score" IS NOT NULL THEN ntile(4) over (partition by rm.dist_model_score is not null, fd.population_class, rm.level order by rm.dist_model_score) ELSE NULL END AS "dist_model_score_quartile",
CASE WHEN rm."risk_model_deaths" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_deaths is not null, fd.population_class, rm.level order by rm.risk_model_deaths) ELSE NULL END AS "risk_model_deaths_quartile",
CASE WHEN rm."risk_model_injuries" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_injuries is not null, fd.population_class, rm.level order by rm.risk_model_injuries) ELSE NULL END AS "risk_model_injuries_quartile",
CASE WHEN rm."risk_model_fires_size0" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size0 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size0) ELSE NULL END AS "risk_model_fires_size0_quartile",
CASE WHEN rm."risk_model_fires_size1" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size1 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size1) ELSE NULL END AS "risk_model_fires_size1_quartile",
CASE WHEN rm."risk_model_fires_size2" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires_size2 is not null, fd.population_class, rm.level order by rm.risk_model_fires_size2) ELSE NULL END AS "risk_model_fires_size2_quartile",
CASE WHEN rm."risk_model_fires" IS NOT NULL THEN ntile(4) over (partition by rm.risk_model_fires is not null, fd.population_class, rm.level order by rm.risk_model_fires) ELSE NULL END AS "risk_model_fires_quartile",
CASE WHEN "nfirs"."avg_fires" IS NOT NULL THEN ntile(4) over (partition by avg_fires is not null, fd.population_class, rm.level order by avg_fires) ELSE NULL END AS "residential_fires_avg_3_years_quartile"
FROM "firestation_firedepartment" fd
INNER JOIN "firestation_firedepartmentriskmodels" rm ON
rm.department_id = fd.id
LEFT JOIN (
SELECT fire_department_id, AVG(count) as avg_fires, level
from firestation_nfirsstatistic
WHERE year >= 2010 and metric='residential_structure_fires'
GROUP BY fire_department_id, level) nfirs
ON (fd.id=nfirs.fire_department_id and nfirs.level = rm.level)
WHERE fd.archived=False and fd.population_class = 9
ORDER BY fd.id"""
df = pd.read_sql_query(q, connections['default'])
levels = {'0': 'All hazard levels', '1': 'Low hazard', '2': 'Medium hazard', '4': 'High hazard', '5': 'Unknown hazard'}
quartiles = {'1': 'Low risk', '2': 'Medium risk', '3': 'Medium risk', '4': 'High risk'}
# Transformations to human-readable values
df['level'] = df['level'].apply(lambda x: levels[str(x)])
for c in ['risk_model_size1_percent_size2_percent_sum_quartile', 'risk_model_deaths_injuries_sum_quartile', 'dist_model_score_quartile', 'risk_model_deaths_quartile', 'risk_model_injuries_quartile', 'risk_model_fires_size0_quartile', 'risk_model_fires_size1_quartile', 'risk_model_fires_size2_quartile', 'risk_model_fires_quartile', 'residential_fires_avg_3_years_quartile']:
df[c] = df[c].apply(lambda x: quartiles[str(int(x))] if not np.isnan(x) else 'N/A')
df[df['id'] == fd.id][['id', 'name', 'dist_model_score', 'level']]
Out[4]:
population_quartiles
materialized view (to ensure that we're starting w/ a clean slate)update_nfirs
django command to pull NFIRS stats into FireCARES per departmentcalc-department-owned-tracts
django command to calculate the census-tract based department jurisdictionFireDepartment.owned_tract_geom
on FC FireDepartmentcalc-structure-counts
django command to determine the structure counts over a department's census-tract-based jurisdictionFireDepartmentRiskModels.structure_count
on FC FireDepartmentRiskModelscalc-story-distribution
(not yet completed) to calculate the story distribution for a department's census-tract-based jurisdictionFireDepartmentRiskModels.floor_count_coefficients
import-predictions
to import predictions from Stanley's prediction modelpopulation_quartiles
materialized view based on the imported informationload-dist-scores
to perform the DIST score calculation per department
In [ ]:
# Clear out FireDepartmentRiskModels since all of this information is derived
FireDepartmentRiskModels.objects.all().delete()
NFIRSStatistic.objects.all().delete()
with connections['default'].cursor() as c:
c.execute("refresh materialized view population_quartiles;")
assert PopulationClassQuartile.objects.count() == 0
In [ ]:
similar_departments = list(FireDepartment.objects.filter(id__in=fd.similar_departments.values_list('id', flat=True))) + [fd]
In [ ]:
for f in similar_departments:
update_nfirs_counts.delay(f.id)
In [ ]:
df = pd.DataFrame(map(lambda x: (x.id, x.name, x.population, x.region), similar_departments), columns=['id', 'name', 'population', 'region'])
df.sort('population', ascending=False)
In [ ]:
df = pd.read_sql_query("select * from firestation_nfirsstatistic;", connections['default'])
department_count = len(df['fire_department_id'].unique())
year_count = len(df['year'].unique())
metric_type_count = len(df['metric'].unique())
# Should have 5 risk levels (low, medium, high, unknown, all) for each metrics for every year for each department
assert len(df) == department_count * 5 * metric_type_count * year_count
In [ ]:
# NOTE: this takes some time...
for f in similar_departments:
calculate_department_census_geom.delay(f.id)
fd.refresh_from_db()
display_geom(fd.owned_tracts_geom)
In [ ]:
# NOTE: this takes some time...
from django.core.management import call_command
dept_ids = map(lambda x: str(x.id), similar_departments)
call_command('import-predictions', '../predictions.2015.csv', '--ids', *dept_ids) # Also updates the performance score
In [ ]:
with connections['default'].cursor() as c:
c.execute('refresh materialized view population_quartiles;')
PopulationClassQuartile.objects.count()
In [ ]:
# NOTE: this takes a LONG time... (like hours)
for f in similar_departments:
calculate_structure_counts.delay(f.id)
In [ ]:
for f in similar_departments:
update_performance_score.delay(f.id)
In [ ]:
q = """SELECT *
FROM crosstab(
'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
FROM buildingfires a left join (
SELECT state,
fdid,
inc_date,
inc_no,
exp_no,
geom,
x.parcel_id,
x.risk_category
FROM (select * from incidentaddress a
left join parcel_risk_category_local b using (parcel_id)
) AS x
) AS y using (state, inc_date, exp_no, fdid, inc_no)
where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
and fire_sprd is not null and fire_sprd != ''''
group by risk_category, fire_sprd
order by risk_category, fire_sprd ASC')
AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""
with connections['nfirs'].cursor() as c:
c.execute(q, {'state': fd.state, 'fdid': fd.fdid})
results = dictfetchall(c)
all_counts = dict(object_of_origin=0,
room_of_origin=0,
floor_of_origin=0,
building_of_origin=0,
beyond=0)
risk_mapping = {'Low': 1, 'Medium': 2, 'High': 4, 'N/A': 5}
print 'ALL RESULTS'
df = pd.DataFrame(results)
display(df)
for result in results:
print result.get('risk_category')
dist_model = dist_model_for_hazard_level(result.get('risk_category'))
counts = dict(object_of_origin=result.get('object_of_origin', 0),
room_of_origin=result.get('room_of_origin', 0),
floor_of_origin=result.get('floor_of_origin', 0),
building_of_origin=result.get('building_of_origin', 0),
beyond=result.get('beyond', 0))
display(counts)
# add current risk category to the all risk category
for key, value in counts.items():
all_counts[key] += value
ahs_building_size = ahs_building_areas(fd.fdid, fd.state)
if ahs_building_size is not None:
counts['building_area_draw'] = ahs_building_size
response_times = response_time_distributions.get('{0}-{1}'.format(fd.fdid, fd.state))
if response_times:
counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)
dist = dist_model(floor_extent=False, **counts)
print 'SCORE: {}'.format(dist.gibbs_sample())
dist_model = dist_model_for_hazard_level('All')
if ahs_building_size is not None:
all_counts['building_area_draw'] = ahs_building_size
if response_times:
all_counts['arrival_time_draw'] = LogNormalDraw(*response_times, multiplier=60)
dist = dist_model(floor_extent=False, **all_counts)
display(all_counts)
print 'SCORE: {}'.format(dist.gibbs_sample())
In [ ]:
update_performance_score.delay(fd.id)
In [ ]:
! rm scores.db
In [ ]:
# Using sqlite3 since this might be a long-running process and would hate to have to restart
conn = sqlite3.connect('scores.db')
create = """
CREATE TABLE scores(
fc_dept_id INT NOT NULL,
name TEXT NOT NULL,
low_count REAL NOT NULL,
med_count REAL NOT NULL,
high_count REAL NOT NULL,
unk_count REAL NOT NULL,
low_percent REAL NOT NULL,
med_percent REAL NOT NULL,
high_percent REAL NOT NULL,
unk_percent REAL NOT NULL,
target REAL NOT NULL,
low_score REAL NOT NULL,
med_score REAL NOT NULL,
high_score REAL NOT NULL,
unk_score REAL NOT NULL,
all_score REAL NOT NULL,
all_score_diff REAL NOT NULL,
diff_percentage REAL NOT NULL);
"""
conn.cursor().execute(create)
In [ ]:
conn = sqlite3.connect('scores.db')
qcounts = """SELECT *
FROM crosstab(
'select COALESCE(y.risk_category, ''N/A'') as risk_category, fire_sprd, count(*)
FROM buildingfires a left join (
SELECT state,
fdid,
inc_date,
inc_no,
exp_no,
geom,
x.parcel_id,
x.risk_category
FROM (select * from incidentaddress a
left join parcel_risk_category_local b using (parcel_id)
) AS x
) AS y using (state, inc_date, exp_no, fdid, inc_no)
where a.state='%(state)s' and a.fdid='%(fdid)s' and prop_use in (''419'',''429'',''439'',''449'',''459'',''460'',''462'',''464'',''400'')
and fire_sprd is not null and fire_sprd != ''''
group by risk_category, fire_sprd
order by risk_category, fire_sprd ASC')
AS ct(risk_category text, "object_of_origin" bigint, "room_of_origin" bigint, "floor_of_origin" bigint, "building_of_origin" bigint, "beyond" bigint);"""
ins = """INSERT INTO scores VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);"""
def already_in(fd_id):
return conn.cursor().execute('SELECT fc_dept_id FROM scores WHERE fc_dept_id = ?', (fd_id,)).fetchone() is not None
for fd in FireDepartment.priority_departments.all():
if not already_in(fd.id):
counts = pd.read_sql_query(qcounts, connections['nfirs'], params={'state': fd.state, 'fdid': fd.fdid})
df2 = counts.set_index('risk_category')
counts = df2.sum(axis=1).to_dict()
chigh, clow, cmed, cna = counts.get('High', 0), counts.get('Low', 0), counts.get('Medium', 0), counts.get('N/A', 0)
tot = float(chigh + clow + cmed + cna)
if tot:
hper = chigh / tot
lper = clow / tot
mper = cmed / tot
nper = cna / tot
low = fd.metrics.dist_model_score.low or 0
medium = fd.metrics.dist_model_score.medium or 0
high = fd.metrics.dist_model_score.high or 0
unk = fd.metrics.dist_model_score.unknown or 0
_all = fd.metrics.dist_model_score.all or 0
target = hper * high + lper * low + mper * medium + nper * unk
off = _all - target
diff_per = off / _all if _all else 9e999
args = (fd.id, fd.name, clow, cmed, chigh, cna, lper, mper, hper, nper, target, low, medium, high, unk, _all, off, diff_per)
print 'Adding counts for {}'.format(fd.name)
conn.cursor().execute(ins, args)
conn.commit()
else:
print 'No counts for {}'.format(fd.name)
else:
print 'Skipping {}, already in...'.format(fd.name)
In [ ]:
conn = sqlite3.connect('scores.db')
df = pd.read_sql_query("select * from scores;", conn)
df = df.sort('diff_percentage', ascending=False)
display(df)
df.to_csv('/tmp/scores.csv')