In [75]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
In [76]:
import datetime
from helpers import *
height, weight, gender = load_data(sub_sample=False, add_outlier=False)
x, mean_x, std_x = standardize(height)
y, tx = build_model_data(x, weight)
In [77]:
y.shape, tx.shape
Out[77]:
In [78]:
def calculate_mse(e):
"""Calculate the mse for vector e."""
return 1/2*np.mean(e**2)
def calculate_mae(e):
"""Calculate the mae for vector e."""
return np.mean(np.abs(e))
def compute_loss(y, tx, w):
"""Calculate the loss.
You can calculate the loss using mse or mae.
"""
e = y - tx.dot(w)
return calculate_mse(e)
Fill in the function grid_search() below:
In [79]:
# from costs import *
def grid_search(y, tx, w0, w1):
"""Algorithm for grid search."""
loss = np.zeros((len(w0), len(w1)))
# compute loss for each combinationof w0 and w1.
for ind_row, row in enumerate(w0):
for ind_col, col in enumerate(w1):
w = np.array([row, col])
loss[ind_row, ind_col] = compute_loss(y, tx, w)
return loss
Let us play with the grid search demo now!
In [80]:
from grid_search import generate_w, get_best_parameters
from plots import grid_visualization
# Generate the grid of parameters to be swept
grid_w0, grid_w1 = generate_w(num_intervals=10)
# Start the grid search
start_time = datetime.datetime.now()
grid_losses = grid_search(y, tx, grid_w0, grid_w1)
# Select the best combinaison
loss_star, w0_star, w1_star = get_best_parameters(grid_w0, grid_w1, grid_losses)
end_time = datetime.datetime.now()
execution_time = (end_time - start_time).total_seconds()
# Print the results
print("Grid Search: loss*={l}, w0*={w0}, w1*={w1}, execution time={t:.3f} seconds".format(
l=loss_star, w0=w0_star, w1=w1_star, t=execution_time))
# Plot the results
fig = grid_visualization(grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
fig.set_size_inches(10.0,6.0)
fig.savefig("grid_plot") # Optional saving
Again, please fill in the functions compute_gradient below:
In [81]:
def compute_gradient(y, tx, w):
"""Compute the gradient."""
err = y - tx.dot(w)
grad = -tx.T.dot(err) / len(err)
return grad, err
Please fill in the functions gradient_descent below:
In [82]:
def gradient_descent(y, tx, initial_w, max_iters, gamma):
"""Gradient descent algorithm."""
# Define parameters to store w and loss
ws = [initial_w]
losses = []
w = initial_w
for n_iter in range(max_iters):
# compute loss, gradient
grad, err = compute_gradient(y, tx, w)
loss = calculate_mse(err)
# gradient w by descent update
w = w - gamma * grad
# store w and loss
ws.append(w)
losses.append(loss)
print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
return losses, ws
Test your gradient descent function through gradient descent demo shown below:
In [83]:
# from gradient_descent import *
from plots import gradient_descent_visualization
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
# Initialization
w_initial = np.array([0, 0])
# Start gradient descent.
start_time = datetime.datetime.now()
gradient_losses, gradient_ws = gradient_descent(y, tx, w_initial, max_iters, gamma)
end_time = datetime.datetime.now()
# Print result
exection_time = (end_time - start_time).total_seconds()
print("Gradient Descent: execution time={t:.3f} seconds".format(t=exection_time))
In [84]:
# Time Visualization
from ipywidgets import IntSlider, interact
def plot_figure(n_iter):
fig = gradient_descent_visualization(
gradient_losses, gradient_ws, grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight, n_iter)
fig.set_size_inches(10.0, 6.0)
interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_ws)))
In [85]:
def compute_stoch_gradient(y, tx, w):
"""Compute a stochastic gradient from just few examples n and their corresponding y_n labels."""
err = y - tx.dot(w)
grad = -tx.T.dot(err) / len(err)
return grad, err
def stochastic_gradient_descent(
y, tx, initial_w, batch_size, max_iters, gamma):
"""Stochastic gradient descent."""
# Define parameters to store w and loss
ws = [initial_w]
losses = []
w = initial_w
for n_iter in range(max_iters):
for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
# compute a stochastic gradient and loss
grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
# update w through the stochastic gradient update
w = w - gamma * grad
# calculate loss
loss = compute_loss(y, tx, w)
# store w and loss
ws.append(w)
losses.append(loss)
print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
return losses, ws
In [86]:
# from stochastic_gradient_descent import *
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
batch_size = 1
# Initialization
w_initial = np.array([0, 0])
# Start SGD.
start_time = datetime.datetime.now()
sgd_losses, sgd_ws = stochastic_gradient_descent(
y, tx, w_initial, batch_size, max_iters, gamma)
end_time = datetime.datetime.now()
# Print result
exection_time = (end_time - start_time).total_seconds()
print("SGD: execution time={t:.3f} seconds".format(t=exection_time))
In [87]:
# Time Visualization
from ipywidgets import IntSlider, interact
def plot_figure(n_iter):
fig = gradient_descent_visualization(
sgd_losses, sgd_ws, grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight, n_iter)
fig.set_size_inches(10.0, 6.0)
interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_ws)))
In [ ]: