In [1]:
# same data as in linreg
import numpy as np
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance, c_ contatenates arrays!
# ----
# (p. 115) gradient descent
eta = 0.1 # learning rate
n_iterations = 1000
m = 100
theta = np.random.randn(2, 1) # random initialization
for iteration in range(n_iterations):
gradients = 2/m * X_b.T.dot((X_b.dot(theta) - y))
theta = theta - eta * gradients
theta
Out[1]:
In [2]:
# (p. 118) Simulated annealing (reducing the learning rate for SGD)
n_epochs = 50
t0, t1 = 5, 50 # learning schedule hyperparameters
def learning_schedule(t):
return t0 / (t + t1)
theta = np.random.randn(2, 1) # random initialization
for epoch in range(n_epochs):
for i in range(m):
random_index = np.random.randint(m)
xi = X_b[random_index:random_index + 1]
yi = y[random_index:random_index + 1]
gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
eta = learning_schedule(epoch * m + i)
theta = theta - eta * gradients
theta
Out[2]:
In [4]:
# (p. 119) same, but using sklearn
from sklearn.linear_model import SGDRegressor
sgd_reg = SGDRegressor(n_iter=50, penalty=None, eta0=0.01)
sgd_reg.fit(X, y.ravel())
sgd_reg.intercept_, sgd_reg.coef_
Out[4]:
In [ ]: