In [47]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

In [24]:
df=pd.read_excel('//Users/mercyemelike/Desktop/Lede-Program/algorithms/class4/homework/data/2013_NYC_CD_MedianIncome_Recycle.xlsx')

In [16]:
df.head()


Out[16]:
CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074
2 Chinatown & Lower East Side 40919 0.156485
3 Chelsea, Clinton & Midtown Business Distric 92583 0.235125
4 Chelsea, Clinton & Midtown Business Distric 92583 0.246725

In [49]:
ax = df.plot(x='MdHHIncE', y='RecycleRate', kind='scatter', figsize = (10,10))

ax.set(xlabel="Median Household Income", ylabel="Recycle Rate")
ax.set_title("Strong Positive Correlation between Median Household Income and Recycle Rate")


Out[49]:
<matplotlib.text.Text at 0x10aa145f8>

In [50]:
df.corr()


Out[50]:
MdHHIncE RecycleRate
MdHHIncE 1.000000 0.884783
RecycleRate 0.884783 1.000000

In [ ]: