notebook.community

Edit and run



In [2]:

    
%matplotlib inline
# pg 58 This will evaluate the prediction from section 2.1



In [3]:

    
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression



In [4]:

    
boston = load_boston()
lr = LinearRegression()
lr.fit(boston.data, boston.target)









    Out[4]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [5]:

    
predictions = lr.predict(boston.data)



In [6]:

    
import matplotlib.pyplot as plt
import numpy as np



In [7]:

    
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
ax.hist(boston.target - predictions, bins=50)
ax.set_title('Histogram of Residuals.')









    Out[7]:





<matplotlib.text.Text at 0x10c310810>



In [8]:

    
# look at the mean of the residuals (closer to 0 is best)
np.mean(boston.target - predictions)









    Out[8]:





4.9709906810093963e-15



In [9]:

    
# Look at the Q-Q plot.



In [10]:

    
from scipy.stats import probplot
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
probplot(boston.target - predictions, plot=ax)
ax









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x10c291410>



In [11]:

    
# Created Mean Squared Error (MSE) and Mean Absolute Deviation(MAD)
# in msemad.py for this next part and later in the book.



In [12]:

    
from msemad import MSE, MAD



In [13]:

    
MSE(boston.target, predictions)









    Out[13]:





21.897779217687496



In [14]:

    
MAD(boston.target, predictions)









    Out[14]:





3.2729446379969334



In [40]:

    
n_bootstraps = 100
len_boston = len(boston.target)
subsample_size = np.int(0.5*len_boston)



In [41]:

    
subsample = lambda: np.random.choice(np.arange(0, len_boston), size=subsample_size)



In [42]:

    
coefs = np.ones(n_bootstraps)



In [43]:

    
for i in range(n_bootstraps):
    subsample_idx = subsample()
    subsample_X = boston.data[subsample_idx]
    subsample_y = boston.target[subsample_idx]
    lr.fit(subsample_X, subsample_y)
    coefs[i] = lr.coef_[0]



In [ ]:



In [ ]:



In [44]:

    
import matplotlib.pyplot as plt
f = plt.figure(figsize=(7,5))
ax = f.add_subplot(111)
ax.hist(coefs, bins=50)
ax.set_title("Histogram of the lr.coef_[0]")









    Out[44]:





<matplotlib.text.Text at 0x10cdba690>



In [45]:

    
np.percentile(coefs, [2.5, 97.5])









    Out[45]:





array([-0.18285195,  0.08042941])



In [ ]: