Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm
In [1]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
In [3]:
df = pd.read_excel('2013_NYC_CD_MedianIncome_Recycle.xlsx')
In [4]:
df.columns
Out[4]:
In [5]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
In [6]:
intercept, slope = lm.params
In [9]:
df.plot(x='MdHHIncE', y='RecycleRate', kind='scatter')
plt.plot(df['MdHHIncE'], df['MdHHIncE']*slope+intercept, '-', color='red')
Out[9]:
In [11]:
def predict_recyclerate(income):
return (income*slope)+intercept
In [13]:
predict_recyclerate(50000)
Out[13]: