Assignment 3
Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.formula.api as smf
In [3]:
df = pd.read_excel("/home/sean/git/algorithms/class4/homework/data/2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [4]:
df.head()
Out[4]:
In [46]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
In [47]:
lm.params
Out[47]:
In [65]:
intercept, slope = lm.params
In [57]:
slope
Out[57]:
In [58]:
intercept
Out[58]:
In [63]:
plt.yscale?
In [67]:
df.plot(kind='scatter', x='MdHHIncE', y='RecycleRate')
plt.plot(df["MdHHIncE"],slope*df["MdHHIncE"]+intercept,"-", c='red')
Out[67]:
In [85]:
def income_to_rate(income_str):
income=float(income_str)
return '%s' % float('%.3g' % ((slope*income+intercept)*100))
In [86]:
income_to_rate(50000)
Out[86]:
In [90]:
income=input('Enter median neighborhood income: $')
print('Predicted recycle rate for this neighborhood is {}%'.format(income_to_rate(income)))
In [ ]: