In [14]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [15]:
df = pd.read_excel("data/2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [23]:
df.head(2)
Out[23]:
In [16]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
In [18]:
lm.params
Out[18]:
In [20]:
intercept, slope = lm.params
In [21]:
df.plot(kind="scatter",x="MdHHIncE",y="RecycleRate")
plt.plot(df["MdHHIncE"],slope*df["MdHHIncE"]+intercept,"-",color="red")
Out[21]:
In [22]:
def income_predictor(your_income):
recycle_rate = your_income * lm.params.MdHHIncE + lm.params.Intercept
return recycle_rate
In [25]:
# Example
income_predictor(119596)
Out[25]:
In [ ]: