In [2]:
%matplotlib inline
# pg 58 This will evaluate the prediction from section 2.1
In [3]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
In [4]:
boston = load_boston()
lr = LinearRegression()
lr.fit(boston.data, boston.target)
Out[4]:
In [5]:
predictions = lr.predict(boston.data)
In [6]:
import matplotlib.pyplot as plt
import numpy as np
In [7]:
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
ax.hist(boston.target - predictions, bins=50)
ax.set_title('Histogram of Residuals.')
Out[7]:
In [8]:
# look at the mean of the residuals (closer to 0 is best)
np.mean(boston.target - predictions)
Out[8]:
In [9]:
# Look at the Q-Q plot.
In [10]:
from scipy.stats import probplot
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
probplot(boston.target - predictions, plot=ax)
ax
Out[10]:
In [11]:
# Created Mean Squared Error (MSE) and Mean Absolute Deviation(MAD)
# in msemad.py for this next part and later in the book.
In [12]:
from msemad import MSE, MAD
In [13]:
MSE(boston.target, predictions)
Out[13]:
In [14]:
MAD(boston.target, predictions)
Out[14]:
In [40]:
n_bootstraps = 100
len_boston = len(boston.target)
subsample_size = np.int(0.5*len_boston)
In [41]:
subsample = lambda: np.random.choice(np.arange(0, len_boston), size=subsample_size)
In [42]:
coefs = np.ones(n_bootstraps)
In [43]:
for i in range(n_bootstraps):
subsample_idx = subsample()
subsample_X = boston.data[subsample_idx]
subsample_y = boston.target[subsample_idx]
lr.fit(subsample_X, subsample_y)
coefs[i] = lr.coef_[0]
In [ ]:
In [ ]:
In [44]:
import matplotlib.pyplot as plt
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
ax.hist(coefs, bins=50)
ax.set_title("Histogram of the lr.coef_[0]")
Out[44]:
In [45]:
np.percentile(coefs, [2.5, 97.5])
Out[45]:
In [ ]: