In [1]:
import pandas as pd
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf

In [3]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")

In [4]:
df.head()


Out[4]:
CD_Name MdHHIncE RecycleRate
0 Battery Park City, Greenwich Village & Soho 119596 0.286771
1 Battery Park City, Greenwich Village & Soho 119596 0.264074
2 Chinatown & Lower East Side 40919 0.156485
3 Chelsea, Clinton & Midtown Business Distric 92583 0.235125
4 Chelsea, Clinton & Midtown Business Distric 92583 0.246725

In [40]:
lm = smf.ols(formula="RecycleRate ~ MdHHIncE",data=df).fit()

In [41]:
lm.params


Out[41]:
Intercept    0.074804
MdHHIncE     0.000002
dtype: float64

In [42]:
intercept, slope = lm.params

In [45]:
def guess_median_income(recycle_rate):
    for item in df['RecycleRate']:
        recycle_rate = median_income * slope + intercept
    return recycle_rate

In [46]:
guess_recyclerate(119596)


Out[46]:
0.29840233275398087

In [ ]: