In [16]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

plt.plot(X, y, "b.", label="Raw data")
plt.axis([0, 2, 0, 15])
plt.legend(loc="upper left")
plt.show()



In [4]:
# (p. 109) use normal equation to calculate optimal parameters - theta_test
X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance, c_ contatenates arrays!
theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
theta_best
# we used y = 4 + 3x plus noise to generate the data, so [4, 3] would be ideal


Out[4]:
array([[ 3.90712053],
       [ 3.05107578]])

In [6]:
# (p. 109) make predictions using this model
X_new = np.array([[0], [2]])
X_new_b = np.c_[np.ones((2, 1)), X_new]  # again, add x0 = 1 to each instance
y_predict = X_new_b.dot(theta_best)
y_predict


Out[6]:
array([[  3.90712053],
       [ 10.00927208]])

In [15]:
plt.plot(X_new, y_predict, "r-", label="Predictions")
plt.plot(X, y, "b.", label="Raw data")
plt.axis([0, 2, 0, 15])
plt.legend(loc="upper left")
plt.show()



In [17]:
# (p. 110) the same model, but using sklearn:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
lin_reg.intercept_, lin_reg.coef_


Out[17]:
(array([ 3.93426116]), array([[ 3.00904255]]))

In [18]:
lin_reg.predict(X_new)


Out[18]:
array([[ 3.93426116],
       [ 9.95234625]])

In [ ]: