Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm


In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

In [3]:
df = pd.read_excel('2013_NYC_CD_MedianIncome_Recycle.xlsx')

In [4]:
df.columns


Out[4]:
Index(['CD_Name', 'MdHHIncE', 'RecycleRate'], dtype='object')

In [5]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()

In [6]:
intercept, slope = lm.params

In [9]:
df.plot(x='MdHHIncE', y='RecycleRate', kind='scatter')
plt.plot(df['MdHHIncE'], df['MdHHIncE']*slope+intercept, '-', color='red')


Out[9]:
[<matplotlib.lines.Line2D at 0x10a23d6d8>]

In [11]:
def predict_recyclerate(income): 
    return (income*slope)+intercept

In [13]:
predict_recyclerate(50000)


Out[13]:
0.16828476953547261