In [1]:
from __future__ import division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
Aggregated to community area, parcel data from Cook County can be compared with community area level counts of basement flooding calls. Overall, it seems there's a moderate correlation mostly with mean parcel value for an area and number of basement flooding 311 calls.
In [2]:
flood_comm_df = pd.read_csv('311_data/wib_calls_311_comm.csv')
flood_comm_stack_df = pd.DataFrame(flood_comm_df.stack()).reset_index()
flood_comm_stack_df = flood_comm_stack_df.rename(columns={'level_0':'Date','level_1':'Community Area',0:'Count Calls'})
flood_comm_totals = pd.DataFrame(flood_comm_stack_df.groupby(['Community Area'])['Count Calls'].sum()).reset_index()
flood_comm_totals.head()
Out[2]:
In [3]:
parcel_comm_df = pd.read_csv('parcel_data/res_parcel_stats_by_comm.csv')
parcel_comm_df = parcel_comm_df.rename(columns={'CommunityArea': 'Community Area'})
parcel_comm_df = parcel_comm_df[['Community Area', 'MeanBldgAge', 'ParcelCount', 'MeanBldgValue']]
parcel_comm_df.head()
Out[3]:
In [4]:
flood_parcel_df = flood_comm_totals.merge(parcel_comm_df, on='Community Area')
flood_parcel_df['Count Calls'] = flood_parcel_df['Count Calls'].astype(int)
flood_parcel_df.head()
Out[4]:
In [5]:
# The Loop, Near North Side, and Lincoln Park are outliers, so removing them
flood_parcel_sub = flood_parcel_df.loc[~flood_parcel_df['Community Area'].isin(['LOOP', 'NEAR NORTH SIDE', 'LINCOLN PARK'])].copy()
flood_parcel_sub.plot(title='Flooding Calls v Mean Parcel Value', kind='scatter', x='MeanBldgValue', y='Count Calls')
Out[5]:
In [6]:
flood_parcel_sub.corr()
Out[6]:
In [ ]: