Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm



In [1]:

    
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf



In [3]:

    
df = pd.read_excel('2013_NYC_CD_MedianIncome_Recycle.xlsx')



In [4]:

    
df.columns









    Out[4]:





Index(['CD_Name', 'MdHHIncE', 'RecycleRate'], dtype='object')



In [5]:

    
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()



In [6]:

    
intercept, slope = lm.params



In [9]:

    
df.plot(x='MdHHIncE', y='RecycleRate', kind='scatter')
plt.plot(df['MdHHIncE'], df['MdHHIncE']*slope+intercept, '-', color='red')









    Out[9]:





[<matplotlib.lines.Line2D at 0x10a23d6d8>]



In [11]:

    
def predict_recyclerate(income): 
    return (income*slope)+intercept



In [13]:

    
predict_recyclerate(50000)









    Out[13]:





0.16828476953547261