In [ ]:
# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)
import sys
import os
import time
import pandas as pd
import numpy as np
sys.path.insert(0, os.path.realpath('..'))
import folium
import django
django.setup()
from django.db import connections
from pretty import pprint
from firecares.firestation.models import FireDepartment, FireStation, NFIRSStatistic
from django.db.models import Avg, Max, Min, Q
from django.contrib.gis.geos import GEOSGeometry
from IPython.display import display
from firecares.utils import lenient_summation, dictfetchall
pd.set_option("display.max_rows",100)
def display_geom(geom):
_map = folium.Map(location=[geom.centroid.y, geom.centroid.x],
tiles='Stamen Toner')
_map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
ll = geom.extent[1::-1]
ur = geom.extent[3:1:-1]
_map.fit_bounds([ll, ur])
return _map
fd = FireDepartment.objects.get(id=95982)
In [ ]:
df = pd.read_csv('/firecares/predictions.2015.csv')
cols = ['lr_fire', 'mr_fire', 'h.fire', 'lr_inj', 'mr_inj', 'h.inj', 'lr_death', 'mr_death', 'h.death', 'lr_size_2', 'mr_size_2', 'h.size2', 'lr_size_3', 'mr_size_3', 'h.size3']
# Find stats for Richmond, VA
richmond = df[df['fd_id'] == 93345]
In [ ]:
# Sum all Richmond rows
df2 = richmond.groupby(['fd_id', 'state'])[cols].sum()
df.groupby(['state'])[cols].mean()
In [ ]:
# High standard deviation for high-risk-level fire values
display(richmond.std())
display(richmond.mean())
display(richmond.sum())
display(richmond.max())
In [ ]:
# Actuals from NFIRS, average of residential structure fires over years (for high structure risk level)
pprint(list(fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=4).values('count', 'year')))
high_avg = fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=4).aggregate(Avg('count')).get('count__avg')
print 'Actual average over high-risk structure types per year: {}'.format(high_avg)
In [ ]:
# The current predicted # of fires for high risk structures
print 'Predicted # fires for high-risk structures: {}'.format(sum([df2['h.fire'][0], df2['h.size2'][0], df2['h.size3'][0]]))
In [ ]:
# Verify "Number of fires -> Average since 2010" displayed values
low = fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=1).aggregate(Avg('count')).get('count__avg')
metrics = fd.metrics.residential_fires_3_year_avg
assert low == metrics.low
display(low)
display(metrics)
In [ ]:
# Verify predicted deaths and injuries for "Low" structure hazard levels displayed values
low = df2['lr_death'][0] + df2['lr_inj'][0]
assert abs(low - fd.metrics.deaths_and_injuries_sum.low) < 0.0001
display(fd.metrics.deaths_and_injuries_sum)
In [ ]:
# Verify sum of death and injuries over all risk levels
v = sum(filter(lambda x: x >= 0, [df2['lr_death'][0], df2['lr_inj'][0], df2['mr_death'][0], df2['mr_inj'][0], df2['h.death'][0], df2['h.inj'][0]]))
assert abs(v - fd.metrics.deaths_and_injuries_sum.all) < 0.0001
In [ ]:
# Building fires counts/locations
fd = FireDepartment.objects.get(id=93345)
df = pd.read_csv('/firecares/93345-building-fires.csv')
display(df.count())
display(df)
display(df.replace(np.nan, 'Unknown').groupby('risk_category').agg('count')[['alarm']].rename(columns={'alarm': 'Count'}))
In [ ]:
from django.db import connections
from django.contrib.gis.geos import GEOSGeometry
with connections['nfirs'].cursor() as c:
q = """SELECT ST_Multi(ST_Union(bg.geom))
FROM nist.tract_years ty
INNER JOIN census_block_groups_2010 bg
ON ty.tr10_fid = ('14000US'::text || "substring"((bg.geoid10)::text, 0, 12))
WHERE ty.fc_dept_id = %(id)s
GROUP BY ty.fc_dept_id"""
c.execute(q, {'id': 93345})
geom = GEOSGeometry(c.fetchone()[0])
assert geom == fd.owned_tracts_geom
In [ ]:
# Coverage for "owned" census tracts
_map = folium.Map(zoom_start=12,
location=[geom.centroid.y, geom.centroid.x],
tiles='Stamen Toner')
# Green background is the "owned" censuses for the FD based on response to incidents
_map.choropleth(geo_str=fd.owned_tracts_geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
# Red outline is jurisdiction
_map.choropleth(geo_str=fd.geom.geojson, fill_opacity=0, line_weight=10, line_color='red')
folium.LayerControl().add_to(_map)
colors = {'Low': 'green', 'Medium': 'yellow', 'High': 'red', 'nan': 'gray'}
for r in df[['x', 'y', 'risk_category']].values:
if r[0] and r[1]:
folium.CircleMarker(location=[float(r[1]), float(r[0])],
fill_color=colors[str(r[2])], radius=200, fill_opacity=0.4, popup='{}, {}'.format(r[1], r[0])).add_to(_map)
display(_map)
In [ ]:
# Structure/parcel counts over coverage area by hazard level
q = """SELECT sum(case when l.risk_category = 'Low' THEN 1 ELSE 0 END) as low,
sum(CASE WHEN l.risk_category = 'Medium' THEN 1 ELSE 0 END) as medium,
sum(CASE WHEN l.risk_category = 'High' THEN 1 ELSE 0 END) high,
sum(CASE WHEN l.risk_category is null THEN 1 ELSE 0 END) as unknown
FROM parcel_risk_category_local l
JOIN (SELECT ST_SetSRID(%(owned_geom)s::geometry, 4326) as owned_geom) x
ON owned_geom && l.wkb_geometry
WHERE ST_Intersects(owned_geom, l.wkb_geometry)"""
with connections['nfirs'].cursor() as c:
c.execute(q, {'owned_geom': fd.owned_tracts_geom.hex})
res = dictfetchall(c)
display(res)
In [ ]:
# Find multiple incidents at each location
df2 = df.groupby(['x', 'y']).agg('count').sort('alarm', ascending=False)
dup_df = df2[df2['alarm'] > 1][['alarm']]
display(dup_df)
print 'Location count w/ multiple incidents: {}'.format(len(dup_df))
In [ ]:
q = """
select alarm, a.inc_type, alarms,ff_death, oth_death, ST_X(geom) as x, st_y(geom) as y, COALESCE(b.risk_category, 'Unknown') as risk_category, b.parcel_id
from buildingfires a
left join (SELECT * FROM
(SELECT state, fdid, inc_date, inc_no, exp_no, geom, b.parcel_id, b.risk_category, ROW_NUMBER() OVER (PARTITION BY state, fdid, inc_date, inc_no, exp_no, geom ORDER BY st_distance(st_centroid(b.wkb_geometry), a.geom)) AS r
FROM (select * from incidentaddress where state=%(state)s and fdid=%(fdid)s) a
left join parcel_risk_category_local b on a.geom && b.wkb_geometry) x
WHERE x.r = 1) b
using (state, inc_date, exp_no, fdid, inc_no) where state=%(state)s and fdid=%(fdid)s
order by a.alarm"""
odf = pd.read_sql_query(q, connections['nfirs'], params={'fdid': fd.fdid, 'state': fd.state})
In [ ]:
# Map parcel misses
q = """
select alarm, bf.inc_type, bf.alarms, ff_death, oth_death, ST_X(ia.geom) as x, ST_Y(ia.geom) as y, COALESCE(rc.risk_category, 'Unknown') as risk_category, rc.parcel_id
from buildingfires bf
left join incidentaddress ia
using (state, inc_date, exp_no, fdid, inc_no)
left join parcel_risk_category_local rc
on rc.parcel_id = ia.parcel_id
where bf.state=%(state)s and bf.fdid=%(fdid)s
order by bf.alarm"""
df = pd.read_sql_query(q, connections['nfirs'], params={'fdid': fd.fdid, 'state': fd.state})
# Display building fires that don't have a parcel (parcel miss)
misses = df[df['parcel_id'].isnull()]
print 'Misses: {}'.format(misses.shape[0])
import folium
_map = folium.Map(zoom_start=12,
location=[fd.geom.centroid.y, fd.geom.centroid.x],
tiles='Stamen Toner')
_map.choropleth(geo_str=fd.geom.geojson, line_weight=0, fill_opacity=0.1, fill_color='green')
folium.LayerControl().add_to(_map)
for r in misses[['x', 'y']].values:
if r[0] and r[1]:
folium.CircleMarker(location=[float(r[1]), float(r[0])],
fill_color='gray', radius=20, fill_opacity=0.4, popup='{}, {}'.format(r[1], r[0])).add_to(_map)
display(_map)
In [ ]:
# Parcel coverage for Richmond w/ parcel miss incidents
q = """
select ST_Union(p.wkb_geometry)
from parcel_risk_category_local p
where ST_Intersects(p.wkb_geometry, ST_SetSRID(%(owned_geom)s::geometry, 4326))
"""
parcels = pd.read_sql_query(q, connections['nfirs'], params={'owned_geom': fd.owned_tracts_geom.hex})
g = GEOSGeometry(parcels.values[0][0])
_map = display_geom(g)
pts = 0
for r in misses[['x', 'y']].values:
if r[0] and r[1]:
pts += 1
folium.Marker(location=[float(r[1]), float(r[0])], popup='{}, {}'.format(r[1], r[0])).add_to(_map)
display(_map)
display(pts)
In [ ]:
# Count missing geocodes on all priority departments
from firecares.firestation.models import FireDepartment
priorities = list(FireDepartment.priority_departments.all().values('fdid', 'state', 'name'))
q = """
select fdid, state, sum(CASE WHEN geom is null THEN 1 ELSE 0 END) as null_count, count(1), sum(CASE WHEN geom is null THEN 1 ELSE 0 END) / count(1)::decimal * 100.0 as percent_null
from incidentaddress
where fdid = %(fdid)s and state = %(state)s
group by fdid, state
"""
df = pd.DataFrame(columns=['fdid', 'state', 'null_count', 'count', 'percent_null'])
for i in priorities:
fdid, state, name = i.values()
df = df.append(pd.read_sql_query(q, connections['nfirs'], params={'fdid': fdid, 'state': state}))
df = df.sort('percent_null', ascending=False)
display(df)
In [ ]:
fd = FireDepartment.objects.get(id=95982)
fd
In [ ]:
q = """SELECT ST_Multi(ST_Union(bg.geom))
FROM nist.tract_years ty
INNER JOIN census_block_groups_2010 bg
ON ty.tr10_fid = ('14000US'::text || "substring"((bg.geoid10)::text, 0, 12))
WHERE ty.fc_dept_id = %(id)s
GROUP BY ty.fc_dept_id"""
with connections['nfirs'].cursor() as c:
c.execute(q, {'id': fd.id})
geom = c.fetchone()[0]
fd.owned_tracts_geom = geom
fd.save()
In [ ]:
display(display_geom(fd.owned_tracts_geom))
display(fd.owned_tracts_geom.area)
In [ ]:
q = """SELECT sum(case when l.risk_category = 'Low' THEN 1 ELSE 0 END) as low,
sum(CASE WHEN l.risk_category = 'Medium' THEN 1 ELSE 0 END) as medium,
sum(CASE WHEN l.risk_category = 'High' THEN 1 ELSE 0 END) high,
sum(CASE WHEN l.risk_category is null THEN 1 ELSE 0 END) as unknown
FROM parcel_risk_category_local l
JOIN (SELECT ST_SetSRID(%(owned_geom)s::geometry, 4326) as owned_geom) x
ON owned_geom && l.wkb_geometry
WHERE ST_Intersects(owned_geom, l.wkb_geometry)"""
with connections['nfirs'].cursor() as c:
c.execute(q, {'owned_geom': fd.owned_tracts_geom.hex})
res = dictfetchall(c)
display(res)
In [ ]:
q = """
select ST_Union(p.wkb_geometry)
from parcel_risk_category_local p
where ST_Intersects(p.wkb_geometry, ST_SetSRID(%(owned_geom)s::geometry, 4326))
"""
parcels = pd.read_sql_query(q, connections['nfirs'], params={'owned_geom': fd.owned_tracts_geom.hex})
g = GEOSGeometry(parcels.values[0][0])
display_geom(g)
In [ ]:
parcel_coverage_area = g.area
owned_tracts = fd.owned_tracts_geom.area
print '% coverage of owned census tracts by parcels: {}'.format(parcel_coverage_area / owned_tracts)