In [1]:
import pandas as pd
In [2]:
import statsmodels.formula.api as smf
In [3]:
df = pd.read_excel("2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [4]:
df.head()
Out[4]:
In [22]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
In [23]:
lm.params
Out[23]:
Here:
y: is the variable that we want to predict
ß0: is intercept of the regression line i.e. value of y when x is 0
ß1: is coefficient of x i.e. variation in y with change in value of x
x: Variables that affects value of y i.e. already know variable whose effect we want to se on values of y
In [26]:
intercept, slope = lm.params
def RecycleRate_calculator(median_income):
return (0.074804 + (0.000002* float(median_income)))
In [27]:
RecycleRate_calculator(119596)
Out[27]:
In [ ]: