notebook.community

Edit and run



In [16]:

    
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

plt.plot(X, y, "b.", label="Raw data")
plt.axis([0, 2, 0, 15])
plt.legend(loc="upper left")
plt.show()



In [4]:

    
# (p. 109) use normal equation to calculate optimal parameters - theta_test
X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance, c_ contatenates arrays!
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
theta_best
# we used y = 4 + 3x plus noise to generate the data, so [4, 3] would be ideal









    Out[4]:





array([[ 3.90712053],
       [ 3.05107578]])



In [6]:

    
# (p. 109) make predictions using this model
X_new = np.array([[0], [2]])
X_new_b = np.c_[np.ones((2, 1)), X_new]  # again, add x0 = 1 to each instance
y_predict = X_new_b.dot(theta_best)
y_predict









    Out[6]:





array([[  3.90712053],
       [ 10.00927208]])



In [15]:

    
plt.plot(X_new, y_predict, "r-", label="Predictions")
plt.plot(X, y, "b.", label="Raw data")
plt.axis([0, 2, 0, 15])
plt.legend(loc="upper left")
plt.show()



In [17]:

    
# (p. 110) the same model, but using sklearn:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
lin_reg.intercept_, lin_reg.coef_









    Out[17]:





(array([ 3.93426116]), array([[ 3.00904255]]))



In [18]:

    
lin_reg.predict(X_new)









    Out[18]:





array([[ 3.93426116],
       [ 9.95234625]])



In [ ]: