Using the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, calculate the correlation between the recycling rate and the median income. Discuss your findings in your PR.
In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
df=pd.read_excel('/home/sean/git/algorithms/class4/homework/data/2013_NYC_CD_MedianIncome_Recycle.xlsx')
In [6]:
df.head()
Out[6]:
In [9]:
df.plot(kind='scatter', x='MdHHIncE', y='RecycleRate')
Out[9]:
In [14]:
df.corr()
Out[14]:
In [16]:
# i stole this from http://matthiaseisen.com/pp/patterns/p0170/
fig, ax = plt.subplots()
fit = np.polyfit(df['MdHHIncE'], df['RecycleRate'], deg=1)
ax.plot(df['MdHHIncE'], fit[0] * df['MdHHIncE'] + fit[1], color='red')
ax.scatter(df['MdHHIncE'], df['RecycleRate'])
Out[16]:
In [ ]: