In [1]:
from __future__ import division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
%matplotlib inline
In [2]:
flood_comm_df = pd.read_csv('311_data/wib_calls_311_comm.csv')
flood_comm_stack_df = pd.DataFrame(flood_comm_df[flood_comm_df.columns.values[1:]].stack()).reset_index()
flood_comm_stack_df = flood_comm_stack_df.rename(columns={'level_0':'Date','level_1':'Community Area',0:'Count Calls'})
flood_comm_totals = pd.DataFrame(flood_comm_stack_df.groupby(['Community Area'])['Count Calls'].sum()).reset_index()
flood_comm_totals.head()
Out[2]:
In [3]:
ihs_df = pd.read_csv('ihs_data/combined_ihs_data.csv')
ihs_df['Community Area'] = ihs_df['Community Area'].str.upper().apply(lambda x: re.sub(r'[^A-Z ]+', '', x))
ihs_df.head()
Out[3]:
In [4]:
flood_ihs_df = flood_comm_totals.merge(ihs_df, on='Community Area')
print(flood_ihs_df.info(verbose=True))
flood_ihs_df.head()
Out[4]:
In [5]:
# Because calls can't be measured at a rate, and could change based on size, multiplying all parcel rates by parcel count
parcel_comm_df = pd.read_csv('parcel_data/res_parcel_stats_by_comm.csv')
parcel_comm_df = parcel_comm_df.rename(columns={'CommunityArea': 'Community Area'})
parcel_comm_df = parcel_comm_df[['Community Area', 'MeanBldgAge', 'ParcelCount', 'MeanBldgValue']]
flood_parcel_df = flood_ihs_df.merge(parcel_comm_df, on='Community Area')
flood_parcel_df.head()
Out[5]:
In [6]:
flood_parcel_df['Est_Foreclosed_Parcels'] = flood_parcel_df['Percentage_Foreclosed_Parcels']*flood_parcel_df['ParcelCount']
flood_parcel_df['Est_Low_Value_Parcels'] = flood_parcel_df['Share_Low_Value_Mean_05_15']*flood_parcel_df['ParcelCount']
flood_parcel_df['Est_Vacant_Parcels'] = flood_parcel_df['Vacant_Percent_Mean_10_15']*flood_parcel_df['ParcelCount']
flood_parcel_df.head()
Out[6]:
In [7]:
flood_parcel_df.corr()
Out[7]:
In [9]:
plt.rcParams["figure.figsize"] = [15, 5]
flood_parcel_df.plot(kind='scatter',
title='Basement Calls and Foreclosures by Community Area',
x='Foreclosure_Filings_05_15',
y='Count Calls')
Out[9]:
In [ ]: