Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm.


In [2]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

In [4]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")

In [5]:
df.head()


Out[5]:
CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074
2 Chinatown & Lower East Side 40919 0.156485
3 Chelsea, Clinton & Midtown Business Distric 92583 0.235125
4 Chelsea, Clinton & Midtown Business Distric 92583 0.246725

In [6]:
df.plot(kind="scatter",x="MdHHIncE",y="RecycleRate")


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x10e26eb38>

In [7]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()

In [8]:
lm.params


Out[8]:
Intercept    0.074804
MdHHIncE     0.000002
dtype: float64

In [21]:
intercept, slope = lm.params

In [29]:
def find_recycle_rate(user_income):
    user_recycle_rate = slope * float(user_income) + intercept
    return user_recycle_rate

In [30]:
user_income = input("What is your monthly household income?: ")
find_recycle_rate(user_income)


What is your monthly household income?: 50000
Out[30]:
0.16828476953547267

In [ ]:


In [ ]: