In [ ]:
# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)

import sys
import os
import time
import pandas as pd
import numpy as np
sys.path.insert(0, os.path.realpath('..'))
import folium
import django
django.setup()
from django.db import connections
from pretty import pprint
from firecares.firestation.models import FireDepartment, FireStation, NFIRSStatistic
from django.db.models import Avg, Max, Min, Q
from django.contrib.gis.geos import GEOSGeometry
from IPython.display import display
from firecares.utils import lenient_summation, dictfetchall
pd.set_option("display.max_rows",100)

def display_geom(geom):
    _map = folium.Map(location=[geom.centroid.y, geom.centroid.x],
                      tiles='Stamen Toner')
    _map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
    ll = geom.extent[1::-1]
    ur = geom.extent[3:1:-1]
    _map.fit_bounds([ll, ur])

    return _map

fd = FireDepartment.objects.get(id=95982)

Predictions 2015 csv processing


In [ ]:
df = pd.read_csv('/firecares/predictions.2015.csv')
cols = ['lr_fire', 'mr_fire', 'h.fire', 'lr_inj', 'mr_inj', 'h.inj', 'lr_death', 'mr_death', 'h.death', 'lr_size_2', 'mr_size_2', 'h.size2', 'lr_size_3', 'mr_size_3', 'h.size3']
# Find stats for Richmond, VA
richmond = df[df['fd_id'] == 93345]

In [ ]:
# Sum all Richmond rows
df2 = richmond.groupby(['fd_id', 'state'])[cols].sum()
df.groupby(['state'])[cols].mean()

In [ ]:
# High standard deviation for high-risk-level fire values
display(richmond.std())
display(richmond.mean())
display(richmond.sum())
display(richmond.max())

In [ ]:
# Actuals from NFIRS, average of residential structure fires over years (for high structure risk level)

pprint(list(fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=4).values('count', 'year')))

high_avg = fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=4).aggregate(Avg('count')).get('count__avg')
print 'Actual average over high-risk structure types per year: {}'.format(high_avg)

In [ ]:
# The current predicted # of fires for high risk structures
print 'Predicted # fires for high-risk structures: {}'.format(sum([df2['h.fire'][0], df2['h.size2'][0], df2['h.size3'][0]]))

Displayed value verification on FireCARES


In [ ]:
# Verify "Number of fires -> Average since 2010" displayed values

low = fd.nfirsstatistic_set.filter(metric='residential_structure_fires', year__gte=2010, level=1).aggregate(Avg('count')).get('count__avg')
metrics = fd.metrics.residential_fires_3_year_avg

assert low == metrics.low

display(low)
display(metrics)

In [ ]:
# Verify predicted deaths and injuries for "Low" structure hazard levels displayed values

low = df2['lr_death'][0] + df2['lr_inj'][0]
assert abs(low - fd.metrics.deaths_and_injuries_sum.low) < 0.0001
display(fd.metrics.deaths_and_injuries_sum)

In [ ]:
# Verify sum of death and injuries over all risk levels
v = sum(filter(lambda x: x >= 0, [df2['lr_death'][0], df2['lr_inj'][0], df2['mr_death'][0], df2['mr_inj'][0], df2['h.death'][0], df2['h.inj'][0]]))
assert abs(v - fd.metrics.deaths_and_injuries_sum.all) < 0.0001

Structure count graph vs heatmap N/As


In [ ]:
# Building fires counts/locations
fd = FireDepartment.objects.get(id=93345)
df = pd.read_csv('/firecares/93345-building-fires.csv')
display(df.count())
display(df)
display(df.replace(np.nan, 'Unknown').groupby('risk_category').agg('count')[['alarm']].rename(columns={'alarm': 'Count'}))

In [ ]:
from django.db import connections
from django.contrib.gis.geos import GEOSGeometry

with connections['nfirs'].cursor() as c:
    q = """SELECT ST_Multi(ST_Union(bg.geom))
        FROM nist.tract_years ty
        INNER JOIN census_block_groups_2010 bg
        ON ty.tr10_fid = ('14000US'::text || "substring"((bg.geoid10)::text, 0, 12))
        WHERE ty.fc_dept_id = %(id)s
        GROUP BY ty.fc_dept_id"""

    c.execute(q, {'id': 93345})
    geom = GEOSGeometry(c.fetchone()[0])

assert geom == fd.owned_tracts_geom

In [ ]:
# Coverage for "owned" census tracts
_map = folium.Map(zoom_start=12,
                  location=[geom.centroid.y, geom.centroid.x],
                  tiles='Stamen Toner')
# Green background is the "owned" censuses for the FD based on response to incidents
_map.choropleth(geo_str=fd.owned_tracts_geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
# Red outline is jurisdiction 
_map.choropleth(geo_str=fd.geom.geojson, fill_opacity=0, line_weight=10, line_color='red')
folium.LayerControl().add_to(_map)

colors = {'Low': 'green', 'Medium': 'yellow', 'High': 'red', 'nan': 'gray'}
for r in df[['x', 'y', 'risk_category']].values:
    if r[0] and r[1]:
        folium.CircleMarker(location=[float(r[1]), float(r[0])],
                       fill_color=colors[str(r[2])], radius=200, fill_opacity=0.4, popup='{}, {}'.format(r[1], r[0])).add_to(_map)

display(_map)

In [ ]:
# Structure/parcel counts over coverage area by hazard level

q = """SELECT sum(case when l.risk_category = 'Low' THEN 1 ELSE 0 END) as low,
        sum(CASE WHEN l.risk_category = 'Medium' THEN 1 ELSE 0 END) as medium,
        sum(CASE WHEN l.risk_category = 'High' THEN 1 ELSE 0 END) high,
        sum(CASE WHEN l.risk_category is null THEN 1 ELSE 0 END) as unknown
    FROM parcel_risk_category_local l
    JOIN (SELECT ST_SetSRID(%(owned_geom)s::geometry, 4326) as owned_geom) x
    ON owned_geom && l.wkb_geometry
    WHERE ST_Intersects(owned_geom, l.wkb_geometry)"""

with connections['nfirs'].cursor() as c:
    c.execute(q, {'owned_geom': fd.owned_tracts_geom.hex})
    res = dictfetchall(c)
    
display(res)

In [ ]:
# Find multiple incidents at each location
df2 = df.groupby(['x', 'y']).agg('count').sort('alarm', ascending=False)
dup_df = df2[df2['alarm'] > 1][['alarm']]
display(dup_df)
print 'Location count w/ multiple incidents: {}'.format(len(dup_df))

In [ ]:
q = """
select alarm, a.inc_type, alarms,ff_death, oth_death, ST_X(geom) as x, st_y(geom) as y, COALESCE(b.risk_category, 'Unknown') as risk_category, b.parcel_id
from buildingfires a
left join (SELECT * FROM
            (SELECT state, fdid, inc_date, inc_no, exp_no, geom, b.parcel_id, b.risk_category, ROW_NUMBER() OVER (PARTITION BY state, fdid, inc_date, inc_no, exp_no, geom ORDER BY st_distance(st_centroid(b.wkb_geometry), a.geom)) AS r
            FROM (select * from incidentaddress where state=%(state)s and fdid=%(fdid)s) a
            left join parcel_risk_category_local b on a.geom && b.wkb_geometry) x
            WHERE x.r = 1) b
           using (state, inc_date, exp_no, fdid, inc_no) where state=%(state)s and fdid=%(fdid)s
           order by a.alarm"""

odf = pd.read_sql_query(q, connections['nfirs'], params={'fdid': fd.fdid, 'state': fd.state})

In [ ]:
# Map parcel misses

q = """
select alarm, bf.inc_type, bf.alarms, ff_death, oth_death, ST_X(ia.geom) as x, ST_Y(ia.geom) as y, COALESCE(rc.risk_category, 'Unknown') as risk_category, rc.parcel_id
from buildingfires bf
left join incidentaddress ia
using (state, inc_date, exp_no, fdid, inc_no)
left join parcel_risk_category_local rc
on rc.parcel_id = ia.parcel_id
where bf.state=%(state)s and bf.fdid=%(fdid)s
order by bf.alarm"""

df = pd.read_sql_query(q, connections['nfirs'], params={'fdid': fd.fdid, 'state': fd.state})

# Display building fires that don't have a parcel (parcel miss)
misses = df[df['parcel_id'].isnull()]
print 'Misses: {}'.format(misses.shape[0])

import folium
_map = folium.Map(zoom_start=12,
                  location=[fd.geom.centroid.y, fd.geom.centroid.x],
                  tiles='Stamen Toner')

_map.choropleth(geo_str=fd.geom.geojson, line_weight=0, fill_opacity=0.1, fill_color='green')
folium.LayerControl().add_to(_map)

for r in misses[['x', 'y']].values:
    if r[0] and r[1]:
        folium.CircleMarker(location=[float(r[1]), float(r[0])],
                       fill_color='gray', radius=20, fill_opacity=0.4, popup='{}, {}'.format(r[1], r[0])).add_to(_map)

display(_map)

In [ ]:
# Parcel coverage for Richmond w/ parcel miss incidents
q = """
select ST_Union(p.wkb_geometry)
from parcel_risk_category_local p
where ST_Intersects(p.wkb_geometry, ST_SetSRID(%(owned_geom)s::geometry, 4326))
"""

parcels = pd.read_sql_query(q, connections['nfirs'], params={'owned_geom': fd.owned_tracts_geom.hex})
g = GEOSGeometry(parcels.values[0][0])
_map = display_geom(g)

pts = 0
for r in misses[['x', 'y']].values:
    if r[0] and r[1]:
        pts += 1
        folium.Marker(location=[float(r[1]), float(r[0])], popup='{}, {}'.format(r[1], r[0])).add_to(_map)
display(_map)
display(pts)

In [ ]:
# Count missing geocodes on all priority departments
from firecares.firestation.models import FireDepartment
priorities = list(FireDepartment.priority_departments.all().values('fdid', 'state', 'name'))

q = """
select fdid, state, sum(CASE WHEN geom is null THEN 1 ELSE 0 END) as null_count, count(1), sum(CASE WHEN geom is null THEN 1 ELSE 0 END) / count(1)::decimal * 100.0 as percent_null
from incidentaddress
where fdid = %(fdid)s and state = %(state)s
group by fdid, state
"""

df = pd.DataFrame(columns=['fdid', 'state', 'null_count', 'count', 'percent_null'])

for i in priorities:
    fdid, state, name = i.values()
    df = df.append(pd.read_sql_query(q, connections['nfirs'], params={'fdid': fdid, 'state': state}))
    
df = df.sort('percent_null', ascending=False)
    
display(df)

Get parcel counts for departments based on owned census tracts


In [ ]:
fd = FireDepartment.objects.get(id=95982)
fd

In [ ]:
q = """SELECT ST_Multi(ST_Union(bg.geom))
        FROM nist.tract_years ty
        INNER JOIN census_block_groups_2010 bg
        ON ty.tr10_fid = ('14000US'::text || "substring"((bg.geoid10)::text, 0, 12))
        WHERE ty.fc_dept_id = %(id)s
        GROUP BY ty.fc_dept_id"""

with connections['nfirs'].cursor() as c:
    c.execute(q, {'id': fd.id})
    geom = c.fetchone()[0]
    fd.owned_tracts_geom = geom
    fd.save()

In [ ]:
display(display_geom(fd.owned_tracts_geom))
display(fd.owned_tracts_geom.area)

In [ ]:
q = """SELECT sum(case when l.risk_category = 'Low' THEN 1 ELSE 0 END) as low,
        sum(CASE WHEN l.risk_category = 'Medium' THEN 1 ELSE 0 END) as medium,
        sum(CASE WHEN l.risk_category = 'High' THEN 1 ELSE 0 END) high,
        sum(CASE WHEN l.risk_category is null THEN 1 ELSE 0 END) as unknown
    FROM parcel_risk_category_local l
    JOIN (SELECT ST_SetSRID(%(owned_geom)s::geometry, 4326) as owned_geom) x
    ON owned_geom && l.wkb_geometry
    WHERE ST_Intersects(owned_geom, l.wkb_geometry)"""

with connections['nfirs'].cursor() as c:
    c.execute(q, {'owned_geom': fd.owned_tracts_geom.hex})
    res = dictfetchall(c)
    
display(res)

In [ ]:
q = """
select ST_Union(p.wkb_geometry)
from parcel_risk_category_local p
where ST_Intersects(p.wkb_geometry, ST_SetSRID(%(owned_geom)s::geometry, 4326))
"""

parcels = pd.read_sql_query(q, connections['nfirs'], params={'owned_geom': fd.owned_tracts_geom.hex})
g = GEOSGeometry(parcels.values[0][0])
display_geom(g)

In [ ]:
parcel_coverage_area = g.area
owned_tracts = fd.owned_tracts_geom.area
print '% coverage of owned census tracts by parcels: {}'.format(parcel_coverage_area / owned_tracts)