Using the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, calculate the correlation between the recycling rate and the median income. Discuss your findings in your PR.
In [3]:
import pandas as pd
%matplotlib inline
In [4]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [5]:
df.head()
Out[5]:
In [6]:
df
Out[6]:
In [7]:
df['MdHHIncE'].mean()
Out[7]:
In [8]:
df['MdHHIncE'].median()
Out[8]:
In [9]:
df['MdHHIncE'].mode()
Out[9]:
In [10]:
df['MdHHIncE'].describe()
Out[10]:
In [17]:
df['MdHHIncE'].max() - df['MdHHIncE'].min()
Out[17]:
In [11]:
df['MdHHIncE'].quantile(q=0.25)
Out[11]:
In [12]:
df['MdHHIncE'].quantile(q=0.5)
Out[12]:
In [13]:
df['MdHHIncE'].quantile(q=0.75)
Out[13]:
In [35]:
m_iqr = df['MdHHIncE'].quantile(q=0.75) - df['MdHHIncE'].quantile(q=0.25)
m_iqr
Out[35]:
In [34]:
df['MdHHIncE'].quantile(q=0.75) + (iqr*1.5)
Out[34]:
In [19]:
df['MdHHIncE'].quantile(q=0.25) - (iqr*1.5)
Out[19]:
In [20]:
df['MdHHIncE'].std()
Out[20]:
In [22]:
df['RecycleRate'].describe()
Out[22]:
In [23]:
df['RecycleRate'].median()
Out[23]:
In [24]:
df['RecycleRate'].max() - df['RecycleRate'].min()
Out[24]:
In [25]:
df['RecycleRate'].quantile(q=0.25)
Out[25]:
In [26]:
df['RecycleRate'].quantile(q=0.5)
Out[26]:
In [27]:
df['RecycleRate'].quantile(q=0.75)
Out[27]:
In [36]:
r_iqr = df['RecycleRate'].quantile(q=0.75) - df['RecycleRate'].quantile(q=0.25)
r_iqr
Out[36]:
In [29]:
df['RecycleRate'].quantile(q=0.25) + (iqr*1.5)
Out[29]:
In [30]:
df['RecycleRate'].quantile(q=0.25) - (iqr*1.5)
Out[30]:
In [31]:
df['RecycleRate'].std()
Out[31]:
In [32]:
df.plot(kind='scatter', y='RecycleRate', x='MdHHIncE')
Out[32]:
In [33]:
df.corr()
Out[33]:
In [ ]: