In [7]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
!pip3 install xlrd
In [10]:
df = pd.read_excel("data/2013_NYC_CD_MedianIncome_Recycle.xlsx")
In [11]:
df.head()
Out[11]:
In [20]:
lm = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
In [21]:
lm.params
Out[21]:
In [22]:
intercept, slope = lm.params
In [27]:
df.plot(kind='scatter',x='MdHHIncE',y='RecycleRate',color='gray',alpha=0.8,linewidth=0)
plt.plot(df["MdHHIncE"],slope*df["MdHHIncE"]+intercept,"-",color="red",alpha=0.5)
Out[27]:
In [28]:
print("The module is: Recycle rate =", slope,"* medianincome +",intercept)
In [31]:
def get_rrate(income):
recycle_rate = income * slope + intercept
return recycle_rate
In [ ]: