Using the data from the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, create a predictor using the weights from the model. This time, use the built in attributes in your model rather than hard-coding them into your algorithm
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.formula.api as smf
df = pd.read_excel('2013_NYC_CD_MedianIncome_Recycle.xlsx')
In [2]:
df.head()
Out[2]:
In [3]:
df.plot(kind='scatter',x='MdHHIncE',y='RecycleRate')
Out[3]:
In [4]:
df.columns = ['location', 'median_income', 'recycle_rate']
In [5]:
df.corr()['median_income']['recycle_rate']
Out[5]:
In [6]:
lm = smf.ols(formula="recycle_rate~median_income",data=df).fit()
intercept, slope = lm.params
lm.params
Out[6]:
In [7]:
df.plot(kind="scatter",x="median_income",y="recycle_rate")
plt.plot(df["median_income"],slope*df["median_income"]+intercept,"-",color="red")
plt.xlabel('Median Income')
plt.ylabel('Recycle Rate')
Out[7]:
In [8]:
def predicting_recylerate(income):
return intercept + float(income) * slope
In [9]:
x = input('What is the median income of your location? ')
print('Expected recycling rate : ' + str(round(predicting_recylerate(x), 2)))