Using the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, calculate the correlation between the recycling rate and the median income. Discuss your findings in your PR.
In [3]:
    
import pandas as pd
%matplotlib inline
    
In [4]:
    
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")
    
In [5]:
    
df.head()
    
    Out[5]:
In [6]:
    
df
    
    Out[6]:
In [7]:
    
df['MdHHIncE'].mean()
    
    Out[7]:
In [8]:
    
df['MdHHIncE'].median()
    
    Out[8]:
In [9]:
    
df['MdHHIncE'].mode()
    
    Out[9]:
In [10]:
    
df['MdHHIncE'].describe()
    
    Out[10]:
In [17]:
    
df['MdHHIncE'].max() - df['MdHHIncE'].min()
    
    Out[17]:
In [11]:
    
df['MdHHIncE'].quantile(q=0.25)
    
    Out[11]:
In [12]:
    
df['MdHHIncE'].quantile(q=0.5)
    
    Out[12]:
In [13]:
    
df['MdHHIncE'].quantile(q=0.75)
    
    Out[13]:
In [35]:
    
m_iqr = df['MdHHIncE'].quantile(q=0.75) - df['MdHHIncE'].quantile(q=0.25)
m_iqr
    
    Out[35]:
In [34]:
    
df['MdHHIncE'].quantile(q=0.75) + (iqr*1.5)
    
    Out[34]:
In [19]:
    
df['MdHHIncE'].quantile(q=0.25) - (iqr*1.5)
    
    Out[19]:
In [20]:
    
df['MdHHIncE'].std()
    
    Out[20]:
In [22]:
    
df['RecycleRate'].describe()
    
    Out[22]:
In [23]:
    
df['RecycleRate'].median()
    
    Out[23]:
In [24]:
    
df['RecycleRate'].max() - df['RecycleRate'].min()
    
    Out[24]:
In [25]:
    
df['RecycleRate'].quantile(q=0.25)
    
    Out[25]:
In [26]:
    
df['RecycleRate'].quantile(q=0.5)
    
    Out[26]:
In [27]:
    
df['RecycleRate'].quantile(q=0.75)
    
    Out[27]:
In [36]:
    
r_iqr = df['RecycleRate'].quantile(q=0.75) - df['RecycleRate'].quantile(q=0.25)
r_iqr
    
    Out[36]:
In [29]:
    
df['RecycleRate'].quantile(q=0.25) + (iqr*1.5)
    
    Out[29]:
In [30]:
    
df['RecycleRate'].quantile(q=0.25) - (iqr*1.5)
    
    Out[30]:
In [31]:
    
df['RecycleRate'].std()
    
    Out[31]:
In [32]:
    
df.plot(kind='scatter', y='RecycleRate', x='MdHHIncE')
    
    Out[32]:
    
In [33]:
    
df.corr()
    
    Out[33]:
In [ ]: