In [46]:
"""
author: mikezawitkowski
I had a conversation with someone who is working with the
LA Fire Department to figure out how important ambient
temperature is to predicting the outbreak of fire.
I wanted to figure out if this was also important
in predicting fire for San Francisco.
We'll try and do a simply seaborn and pandas
correlation plot to see.
"""
from __future__ import division, print_function
import pandas as pd
%matplotlib inline
import seaborn as sns
In [2]:
query_url = 'https://data.sfgov.org/resource/wbb6-uh78.json?$order=close_dttm%20DESC&$offset={}&$limit={}'
offset = 0
limit = 100000
df = pd.read_json(query_url.format(offset, limit))
In [3]:
cols_to_drop = ["automatic_extinguishing_sytem_failure_reason",
"automatic_extinguishing_sytem_type",
"battalion",
"box",
"call_number",
"detector_effectiveness",
"detector_failure_reason",
"ems_personnel",
"ems_units",
"exposure_number",
"first_unit_on_scene",
"ignition_factor_secondary",
"mutual_aid",
"no_flame_spead",
"other_personnel",
"other_units",
"station_area",
"supervisor_district"]
df = df.drop(cols_to_drop, axis=1)
In [5]:
for col in df.columns:
if 'dttm' in col:
df[col] = pd.to_datetime(df[col])
In [8]:
df.alarm_dttm.min()
Out[8]:
In [9]:
df.alarm_dttm.max()
Out[9]:
In [13]:
d = df.alarm_dttm.min()
In [17]:
import json
In [24]:
with open('../../config.json', 'r') as fh:
weather_api_key = json.load(fh)['weatherunderground']
In [23]:
Out[23]:
In [25]:
# weather_underground developer key limits you to 500 calls per day and 10 calls per minute
url = "http://api.wunderground.com/api/{}/history_{}/q/CA/San_Francisco.json"
In [26]:
import requests
In [27]:
df.head()
Out[27]:
In [28]:
df.estimated_property_loss.value_counts(dropna=False)
Out[28]:
In [31]:
# of the 100,000 rows, 96,335 are null
96335 / float(df.shape[0])
Out[31]:
Switching back to ambient temp, I found this resource for downloading from SF based weather stations going back to 2013: http://www.ncdc.noaa.gov/cgi-bin/cdo/cdoprod.pl
I'll add this to external data folder, and here's the link for future reference to download the requested data from 2013 through 2016: http://www.ncdc.noaa.gov/orders/isd/CDO5991787088242.html
Still waiting for the data file to be available at the above link.
UPDATE 7/22 The data was made available, and downloaded to the directory /data/external/noaa/
So next let's take a look at the data and how it correlates with the number of fires.
We'll start this in a separate notebook, using the data file that was shared on 7/20
In [ ]: