In [1]:
%matplotlib inline
In [2]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize
Prepare train and test data.
In [3]:
data_original = np.loadtxt('stanford_dl_ex/ex1/housing.data')
data = np.insert(data_original, 0, 1, axis=1)
np.random.shuffle(data)
train_X = data[:400, :-1]
train_y = data[:400, -1]
m, n = train_X.shape
theta = np.random.rand(n)
Define some necessary functions.
In [4]:
def cost_function(theta, X, y):
squared_errors = (X.dot(theta) - y) ** 2
J = 0.5 * squared_errors.sum()
return J
def gradient(theta, X, y):
errors = X.dot(theta) - y
return errors.dot(X)
Define "step size" (don't set it too low to avoid numerical precision issues).
In [5]:
epsilon = 1e-4
Prepare theta step values (making use of numpy broadcasting).
In [6]:
mask = np.identity(theta.size)
theta_plus = theta + epsilon * mask
theta_minus = theta - epsilon * mask
Compute diffs between theta's gradient as mathematically defined and the gradient as defined by our function above.
In [8]:
diffs = np.empty_like(theta)
for i in range(theta_plus.shape[0]):
gradient_def = (
(cost_function(theta_plus[i], train_X, train_y) - cost_function(theta_minus[i], train_X, train_y)) /
(2 * epsilon)
)
gradient_lin_reg = gradient(theta, train_X, train_y)[i]
diffs[i] = np.absolute(gradient_def - gradient_lin_reg)
In [9]:
diffs
Out[9]:
Lookin' good! The smaller the values, the better.
(Any value significantly larger than 1e-4 indicates a problem.)
In [10]:
assert all(np.less(diffs, 1e-4))
Quality check: passed with distinction.