In [14]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

Assignment 3

Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm


In [15]:
df = pd.read_excel("data/2013_NYC_CD_MedianIncome_Recycle.xlsx")

In [23]:
df.head(2)


Out[23]:
CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074

In [16]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()

In [18]:
lm.params


Out[18]:
Intercept    0.074804
MdHHIncE     0.000002
dtype: float64

In [20]:
intercept, slope = lm.params

In [21]:
df.plot(kind="scatter",x="MdHHIncE",y="RecycleRate")
plt.plot(df["MdHHIncE"],slope*df["MdHHIncE"]+intercept,"-",color="red")


Out[21]:
[<matplotlib.lines.Line2D at 0x108b1d550>]

In [22]:
def income_predictor(your_income): 
    recycle_rate = your_income * lm.params.MdHHIncE + lm.params.Intercept
    return recycle_rate

In [25]:
# Example
income_predictor(119596)


Out[25]:
0.29840233275398087

In [ ]: