“…create a predictor using the weights from the model. This time, use the built in attributes…” link


In [10]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf # for linear regression
import numpy as np

In [3]:
df = pd.read_excel("../../class4/homework/data/2013_NYC_CD_MedianIncome_Recycle.xlsx")

In [142]:
lm = smf.ols(formula="MdHHIncE~RecycleRate",data=df).fit()

In [165]:
fig, ax = plt.subplots()
ax.plot(df['MdHHIncE'], df['RecycleRate'], 'o', label="Data")
ax.plot(lm.fittedvalues, df['RecycleRate'], '-', color='red', label="Prediction")
# or: ax.plot(slope*df['RecycleRate']+intercept, df['RecycleRate'], "-",color="red")
ax.legend(loc="best");



In [177]:
lm2 = smf.ols(formula="RecycleRate~MdHHIncE",data=df).fit()
intercept, slope = lm2.params
def simple_predictor(median_income):
    return  slope*median_income + intercept

x_values = np.linspace(10000, 150000, 30)
y_values = [simple_predictor(i) for i in x_values]

plt.plot(x_values, y_values, "*",color="pink")


Out[177]:
[<matplotlib.lines.Line2D at 0x10ec51c88>]