In [2]:
%matplotlib inline
# pg 58 This will evaluate the prediction from section 2.1

In [3]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression

In [4]:
boston = load_boston()
lr = LinearRegression()
lr.fit(boston.data, boston.target)


Out[4]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [5]:
predictions = lr.predict(boston.data)

In [6]:
import matplotlib.pyplot as plt
import numpy as np

In [7]:
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
ax.hist(boston.target - predictions, bins=50)
ax.set_title('Histogram of Residuals.')


Out[7]:
<matplotlib.text.Text at 0x10c310810>

In [8]:
# look at the mean of the residuals (closer to 0 is best)
np.mean(boston.target - predictions)


Out[8]:
4.9709906810093963e-15

In [9]:
# Look at the Q-Q plot.

In [10]:
from scipy.stats import probplot
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
probplot(boston.target - predictions, plot=ax)
ax


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x10c291410>

In [11]:
# Created Mean Squared Error (MSE) and Mean Absolute Deviation(MAD)
# in msemad.py for this next part and later in the book.

In [12]:
from msemad import MSE, MAD

In [13]:
MSE(boston.target, predictions)


Out[13]:
21.897779217687496

In [14]:
MAD(boston.target, predictions)


Out[14]:
3.2729446379969334

In [40]:
n_bootstraps = 100
len_boston = len(boston.target)
subsample_size = np.int(0.5*len_boston)

In [41]:
subsample = lambda: np.random.choice(np.arange(0, len_boston), size=subsample_size)

In [42]:
coefs = np.ones(n_bootstraps)

In [43]:
for i in range(n_bootstraps):
    subsample_idx = subsample()
    subsample_X = boston.data[subsample_idx]
    subsample_y = boston.target[subsample_idx]
    lr.fit(subsample_X, subsample_y)
    coefs[i] = lr.coef_[0]

In [ ]:


In [ ]:


In [44]:
import matplotlib.pyplot as plt
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
ax.hist(coefs, bins=50)
ax.set_title("Histogram of the lr.coef_[0]")


Out[44]:
<matplotlib.text.Text at 0x10cdba690>

In [45]:
np.percentile(coefs, [2.5, 97.5])


Out[45]:
array([-0.18285195,  0.08042941])

In [ ]: