In [ ]:
# This notebook assumes to be running from your FireCARES VM (eg. python manage.py shell_plus --notebook --no-browser)

import sys
import os
import time
import pandas as pd
import numpy as np
sys.path.insert(0, os.path.realpath('..'))
import folium
import django
import sqlite3
django.setup()
from django.db import connections
from pretty import pprint
from firecares.firestation.models import (FireDepartment, FireStation, NFIRSStatistic, FireDepartmentRiskModels,
                                          PopulationClassQuartile)
from fire_risk.models import DIST, DISTMediumHazard, DISTHighHazard
from fire_risk.models.DIST.providers.ahs import ahs_building_areas
from fire_risk.models.DIST.providers.iaff import response_time_distributions
from django.db.models import Avg, Max, Min, Q
from django.contrib.gis.geos import GEOSGeometry
from IPython.display import display
from firecares.utils import lenient_summation, dictfetchall
from firecares.tasks.update import (calculate_department_census_geom, calculate_story_distribution,
                                    calculate_structure_counts, update_performance_score, update_nfirs_counts,
                                    dist_model_for_hazard_level)
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", 100)

def display_geom(geom):
    _map = folium.Map(location=[geom.centroid.y, geom.centroid.x],
                      tiles='Stamen Toner')
    _map.choropleth(geo_str=geom.geojson, line_weight=0, fill_opacity=0.2, fill_color='green')
    ll = geom.extent[1::-1]
    ur = geom.extent[3:1:-1]
    _map.fit_bounds([ll, ur])

    return _map

# Philadephia-specific
fd = FireDepartment.objects.get(id=91907)

In [ ]:
display(pd.read_sql_query('select region, count(1) from firestation_firedepartment group by region', connections['default']))

# FDs w/ NO region
display(pd.read_sql_query('select id, name, state from firestation_firedepartment where region IS NULL', connections['default']))
  • Fire Risk = risk_model_fires_quartile
  • Fire Spread Risk = risk_model_size1_percent_size2_percent_sum_quartile
  • Death and Injury Risk = risk_model_deaths_injuries_sum_quartile

All of the following are pulled from the predictions model per department, by structure hazard level:

  • rm.risk_model_deaths,
  • rm.risk_model_injuries,
  • rm.risk_model_fires,
  • rm.risk_model_fires_size0,
  • rm.risk_model_fires_size0_percentage,
  • rm.risk_model_fires_size1,
  • rm.risk_model_fires_size1_percentage,
  • rm.risk_model_fires_size2,
  • rm.risk_model_fires_size2_percentage,

In [ ]:
quartiles = """
SELECT
    (SELECT COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0)) AS "risk_model_size1_percent_size2_percent_sum",

    (SELECT COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0)) AS "risk_model_deaths_injuries_sum",
       fd."id",
       fd."created",
       fd."modified",
       fd."fdid",
       fd."name",
       fd."headquarters_address_id",
       fd."mail_address_id",
       fd."headquarters_phone",
       fd."headquarters_fax",
       fd."department_type",
       fd."organization_type",
       fd."website",
       fd."state",
       fd."region",
       rm."dist_model_score",
       rm."risk_model_deaths",
       rm."risk_model_injuries",
       rm."risk_model_fires",
       rm."risk_model_fires_size0",
       rm."risk_model_fires_size0_percentage",
       rm."risk_model_fires_size1",
       rm."risk_model_fires_size1_percentage",
       rm."risk_model_fires_size2",
       rm."risk_model_fires_size2_percentage",
       fd."population",
       fd."population_class",
       fd."featured",
       nfirs.avg_fires AS "residential_fires_avg_3_years",
       rm."level",
       CASE
           WHEN (rm."risk_model_fires_size1_percentage" IS NOT NULL
                 OR rm."risk_model_fires_size2_percentage" IS NOT NULL) THEN ntile(4) over (partition BY COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0) != 0, fd.population_class, rm.level
                                                                                            ORDER BY COALESCE(rm.risk_model_fires_size1_percentage,0)+COALESCE(rm.risk_model_fires_size2_percentage,0))
           ELSE NULL
       END AS "risk_model_size1_percent_size2_percent_sum_quartile",
       CASE
           WHEN (rm."risk_model_deaths" IS NOT NULL
                 OR rm."risk_model_injuries" IS NOT NULL) THEN ntile(4) over (partition BY COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0) != 0, fd.population_class, rm.level
                                                                              ORDER BY COALESCE(rm.risk_model_deaths,0)+COALESCE(rm.risk_model_injuries,0))
           ELSE NULL
       END AS "risk_model_deaths_injuries_sum_quartile",
       CASE
           WHEN rm."dist_model_score" IS NOT NULL THEN ntile(4) over (partition BY rm.dist_model_score IS NOT NULL, fd.population_class, rm.level
                                                                      ORDER BY rm.dist_model_score)
           ELSE NULL
       END AS "dist_model_score_quartile",
       CASE
           WHEN rm."risk_model_deaths" IS NOT NULL THEN ntile(4) over (partition BY rm.risk_model_deaths IS NOT NULL, fd.population_class, rm.level
                                                                       ORDER BY rm.risk_model_deaths)
           ELSE NULL
       END AS "risk_model_deaths_quartile",
       CASE
           WHEN rm."risk_model_injuries" IS NOT NULL THEN ntile(4) over (partition BY rm.risk_model_injuries IS NOT NULL, fd.population_class, rm.level
                                                                         ORDER BY rm.risk_model_injuries)
           ELSE NULL
       END AS "risk_model_injuries_quartile",
       CASE
           WHEN rm."risk_model_fires_size0" IS NOT NULL THEN ntile(4) over (partition BY rm.risk_model_fires_size0 IS NOT NULL, fd.population_class, rm.level
                                                                            ORDER BY rm.risk_model_fires_size0)
           ELSE NULL
       END AS "risk_model_fires_size0_quartile",
       CASE
           WHEN rm."risk_model_fires_size1" IS NOT NULL THEN ntile(4) over (partition BY rm.risk_model_fires_size1 IS NOT NULL, fd.population_class, rm.level
                                                                            ORDER BY rm.risk_model_fires_size1)
           ELSE NULL
       END AS "risk_model_fires_size1_quartile",
       CASE
           WHEN rm."risk_model_fires_size2" IS NOT NULL THEN ntile(4) over (partition BY rm.risk_model_fires_size2 IS NOT NULL, fd.population_class, rm.level
                                                                            ORDER BY rm.risk_model_fires_size2)
           ELSE NULL
       END AS "risk_model_fires_size2_quartile",
       CASE
           WHEN rm."risk_model_fires" IS NOT NULL THEN ntile(4) over (partition BY rm.risk_model_fires IS NOT NULL, fd.population_class, rm.level
                                                                      ORDER BY rm.risk_model_fires)
           ELSE NULL
       END AS "risk_model_fires_quartile",
       CASE
           WHEN "nfirs"."avg_fires" IS NOT NULL THEN ntile(4) over (partition BY avg_fires IS NOT NULL, fd.population_class, rm.level
                                                                    ORDER BY avg_fires)
           ELSE NULL
       END AS "residential_fires_avg_3_years_quartile"
FROM "firestation_firedepartment" fd
INNER JOIN "firestation_firedepartmentriskmodels" rm ON rm.department_id = fd.id
LEFT JOIN
    ( SELECT fire_department_id,
             AVG(COUNT) AS avg_fires,
                           LEVEL
     FROM firestation_nfirsstatistic
     WHERE YEAR >= 2010
         AND metric='residential_structure_fires'
     GROUP BY fire_department_id,
              LEVEL) nfirs ON (fd.id=nfirs.fire_department_id
                               AND nfirs.LEVEL = rm.LEVEL)
WHERE archived=FALSE
ORDER BY id
"""

df = pd.read_sql_query(quartiles, connections['default'])

levels = {'0': 'All hazard levels', '1': 'Low hazard', '2': 'Medium hazard', '4': 'High hazard', '5': 'Unknown hazard'}
quartiles = {'1': 'Low risk', '2': 'Medium risk', '3': 'Medium risk', '4': 'High risk'}

# Transformations to human-readable values
df['level'] = df['level'].apply(lambda x: levels[str(x)])
for c in ['risk_model_size1_percent_size2_percent_sum_quartile', 'risk_model_deaths_injuries_sum_quartile', 'dist_model_score_quartile', 'risk_model_deaths_quartile', 'risk_model_injuries_quartile', 'risk_model_fires_size0_quartile', 'risk_model_fires_size1_quartile', 'risk_model_fires_size2_quartile', 'risk_model_fires_quartile', 'residential_fires_avg_3_years_quartile']:
    df[c] = df[c].apply(lambda x: quartiles[str(int(x))] if not np.isnan(x) else 'N/A')

In [ ]:
df

In [ ]:
quartiles = """
SELECT
    fd.id,
    fd.name,
        CASE WHEN rm.dist_model_score IS NOT NULL
        THEN ntile(4) over (partition by rm.dist_model_score is not null, fd.population_class, rm.level order by rm.dist_model_score)
        ELSE NULL
        END
    AS dist_model_score_quartile, 
    rm.dist_model_score,
    rm.level,
    fd.population_class,
    nfirs.avg_fires as residential_fires_avg_3_years

FROM firestation_firedepartment fd
INNER JOIN firestation_firedepartmentriskmodels rm ON
    rm.department_id = fd.id
LEFT JOIN (
    SELECT fire_department_id, AVG(count) as avg_fires, level
    FROM firestation_nfirsstatistic
    WHERE year >= 2010 AND metric = 'residential_structure_fires'
    GROUP BY fire_department_id, level) AS nfirs
ON (fd.id = nfirs.fire_department_id and nfirs.level = rm.level)
WHERE archived = False and dist_model_score is not null
AND population_class = 9
ORDER BY id
LIMIT 2000
"""

df = pd.read_sql_query(quartiles, connections['default'])
df

In [ ]:
quartiles = """
SELECT
    fd.id,
    fd.name,
        CASE WHEN nfirs.avg_fires IS NOT NULL
        THEN ntile(4) over (partition by avg_fires is not null, fd.population_class, rm.level order by avg_fires)
        ELSE NULL
        END
    AS residential_fires_avg_3_years_quartile,
    rm.dist_model_score,
    rm.level,
    fd.population_class,
    nfirs.avg_fires as residential_fires_avg_3_years
    mfirs.

FROM firestation_firedepartment fd
INNER JOIN firestation_firedepartmentriskmodels rm ON
    rm.department_id = fd.id
LEFT JOIN (
    SELECT fire_department_id, AVG(count) as avg_fires, level
    FROM firestation_nfirsstatistic
    WHERE year >= 2010 AND metric = 'residential_structure_fires'
    GROUP BY fire_department_id, level) AS nfirs
ON (fd.id = nfirs.fire_department_id and nfirs.level = rm.level)
WHERE archived = False and dist_model_score is not null
AND population_class = 9
ORDER BY id
LIMIT 2000
"""

df = pd.read_sql_query(quartiles, connections['default'])
df

In [ ]:
quartiles = """
SELECT
    fd.id,
    fd.name,
        CASE WHEN nfirs.avg_fires IS NOT NULL
        THEN ntile(4) over (partition by avg_fires is not null, fd.population_class, rm.level order by avg_fires)
        ELSE NULL
        END
    AS residential_fires_avg_3_years_quartile,
        CASE WHEN rm.dist_model_score IS NOT NULL
        THEN ntile(4) over (partition by rm.dist_model_score is not null, fd.population_class, rm.level order by rm.dist_model_score)
        ELSE NULL
        END
    AS dist_model_score_quartile,
    rm.dist_model_score,
    rm.level,
    fd.population_class,
    nfirs.avg_fires as residential_fires_avg_3_years

FROM firestation_firedepartment fd
INNER JOIN firestation_firedepartmentriskmodels rm ON
    rm.department_id = fd.id
LEFT JOIN (
    SELECT fire_department_id, AVG(count) as avg_fires, level
    FROM firestation_nfirsstatistic
    WHERE year >= 2010 AND metric = 'residential_structure_fires'
    GROUP BY fire_department_id, level) AS nfirs
ON (fd.id = nfirs.fire_department_id and nfirs.level = rm.level)
WHERE archived = False and dist_model_score is not null
AND population_class = 9
ORDER BY id
LIMIT 2000
"""

df = pd.read_sql_query(quartiles, connections['default'])
df

In [ ]:
q = """SELECT fire_department_id, AVG(count) as avg_fires, SUM(count) as total_fires, level, SUM(count) > 75 as will_have_dist_score
    FROM firestation_nfirsstatistic
    WHERE year >= 2010 AND metric = 'residential_structure_fires' and fire_department_id = %(fd_id)s
    GROUP BY fire_department_id, level"""

pd.read_sql_query(q, connections['default'], params={'fd_id': 73343})

In [ ]:
q = """SELECT fd.id, nfirs.avg_fires, rm.level, rm.dist_model_score
FROM firestation_firedepartment fd
INNER JOIN firestation_firedepartmentriskmodels rm ON
    rm.department_id = fd.id
LEFT JOIN (
    SELECT fire_department_id, AVG(count) as avg_fires, level
    FROM firestation_nfirsstatistic
    WHERE year >= 2010 AND metric = 'residential_structure_fires'
    GROUP BY fire_department_id, level) AS nfirs
ON (fd.id = nfirs.fire_department_id and nfirs.level = rm.level)
WHERE fd.id = %(fd_id)s
"""

pd.read_sql_query(q, connections['default'], params={'fd_id': 73343})

Community Assessment

Risk Model

All community assessment values are based on predicted metrics based on a NIST-developed risk model, current department coverage with some data from the risk model is subset of the total number of fire departments. The risk model does not necessarily include values that would be required to accurately calculate ALL 3 assessments on each department.

Fire Risk

The community assessment in regards to fire risk for a specific department calculates the relative risk to similar departments in the same population class for the given structure hazard level. This relative risk uses the predicted # of fires per year as the benchmark for risk, which is then ordered from least to greatest number of fires for the department and evenly split into 4 groups. Departments that fall into the 1st group (lowest # of fires) are considered "Low" risk when compared to their peers in the same structure hazard level, those that fall into the 2nd and 3rd groups are considered "Medium" risk and those that fall into the last group (those with the greatest number of fires) are considered "High" risk.

Fire Spread Risk

Similar to the fire risk assessment, the community assessment for fire spread risk ONLY takes into account the predicted percentage of fires that extend beyond room of origin (the sum of size2 and size3 fire percentages relative to all fires for a department). The predicted percentage of fires that spread beyond room of origin is then ordered in the same manner as in the fire risk assessment and divided into 4 each groups ranked from least to greatest percentage of fires that spread beyond room of origin, using the same risk designation, "Low", "Medium" and "High", assigned to the first, second/third and fourth groups, respectively.

Death and Injury Risk

Death and injury risk takes into account the predicted # deaths and injuries caused during or response to fires per year [I BELIEVE THIS IS ALL DEATHS/INJURIES, INCLUDING FIREFIGHTERS AND CIVILIANS] for a department. Similarly to the other 2 community assement metrics, the death and injury risk is a relative risk to other departments in the same population class and in response to structures of the same hazard level.


In [ ]:
## TODO: Create mechanism to validate programatically