In [5]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

In [5]:
cd ..


C:\Users\Harsha Devulapalli\Desktop\algorithms

In [6]:
pwd


Out[6]:
'C:\\Users\\Harsha Devulapalli\\Desktop\\algorithms\\class5\\homework'

In [7]:
cd "C:\Users\Harsha Devulapalli\Desktop\algorithms\class4\homework\data"


C:\Users\Harsha Devulapalli\Desktop\algorithms\class4\homework\data

In [8]:
df=pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")

In [9]:
df.plot(kind="scatter",x="MdHHIncE",y="RecycleRate")


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x2bf312b1a90>

In [30]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()

In [31]:
lm.params


Out[31]:
Intercept    0.074804
MdHHIncE     0.000002
dtype: float64

In [32]:
intercept, slope = lm.params

In [33]:
df.plot(kind="scatter",x="MdHHIncE",y="RecycleRate")
plt.plot(df["MdHHIncE"],slope*df["MdHHIncE"]+intercept,"-",color="red")


Out[33]:
[<matplotlib.lines.Line2D at 0x2bf3299d710>]

In [38]:
def returnrate(income):
    rate = slope*income+intercept
    return rate

In [39]:
returnrate(20000)


Out[39]:
0.11219638950565416