In [ ]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
In [ ]:
def least_squares(y, tx):
"""calculate the least squares solution."""
a = tx.T.dot(tx)
b = tx.T.dot(y)
return np.linalg.solve(a, b)
In [ ]:
from helpers import *
def test_your_least_squares():
height, weight, gender = load_data_from_ex02(sub_sample=False, add_outlier=False)
x, mean_x, std_x = standardize(height)
y, tx = build_model_data(x, weight)
weight = least_squares(y, tx)
print(weight)
Test it here
In [ ]:
test_your_least_squares()
In [ ]:
# load dataset
x, y = load_data()
print("shape of x {}".format(x.shape))
print("shape of y {}".format(y.shape))
In [ ]:
def build_poly(x, degree):
"""polynomial basis functions for input data x, for j=0 up to j=degree."""
poly = np.ones((len(x), 1))
for deg in range(1, degree+1):
poly = np.c_[poly, np.power(x, deg)]
return poly
Let us play with polynomial regression. Note that we will use your implemented function compute_mse
. Please copy and paste your implementation from exercise02.
In [ ]:
from costs import compute_mse
from plots import *
def polynomial_regression():
"""Constructing the polynomial basis function expansion of the data,
and then running least squares regression."""
# define parameters
degrees = [1, 3, 7, 12]
# define the structure of the figure
num_row = 2
num_col = 2
f, axs = plt.subplots(num_row, num_col)
for ind, degree in enumerate(degrees):
# form dataset to do polynomial regression.
tx = build_poly(x, degree)
# least squares
weights = least_squares(y, tx)
# compute RMSE
rmse = np.sqrt(2 * compute_mse(y, tx, weights))
print("Processing {i}th experiment, degree={d}, rmse={loss}".format(
i=ind + 1, d=degree, loss=rmse))
# plot fit
plot_fitted_curve(
y, x, weights, degree, axs[ind // num_col][ind % num_col])
plt.tight_layout()
plt.savefig("visualize_polynomial_regression")
plt.show()
Run polynomial regression
In [ ]:
polynomial_regression()
In [ ]:
def split_data(x, y, ratio, seed=1):
"""split the dataset based on the split ratio."""
# set seed
np.random.seed(seed)
# generate random indices
num_row = len(y)
indices = np.random.permutation(num_row)
index_split = int(np.floor(ratio * num_row))
index_tr = indices[: index_split]
index_te = indices[index_split:]
# create split
x_tr = x[index_tr]
x_te = x[index_te]
y_tr = y[index_tr]
y_te = y[index_te]
return x_tr, x_te, y_tr, y_te
Then, test your split_data
function below.
In [ ]:
def train_test_split_demo(x, y, degree, ratio, seed):
"""polynomial regression with different split ratios and different degrees."""
x_tr, x_te, y_tr, y_te = split_data(x, y, ratio, seed)
# form tx
tx_tr = build_poly(x_tr, degree)
tx_te = build_poly(x_te, degree)
weight = least_squares(y_tr, tx_tr)
# calculate RMSE for train and test data.
rmse_tr = np.sqrt(2 * compute_mse(y_tr, tx_tr, weight))
rmse_te = np.sqrt(2 * compute_mse(y_te, tx_te, weight))
print("proportion={p}, degree={d}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
p=ratio, d=degree, tr=rmse_tr, te=rmse_te))
Demo time
In [ ]:
seed = 6
degrees = [1, 3, 7, 12]
split_ratios = [0.9, 0.5, 0.1]
for split_ratio in split_ratios:
for degree in degrees:
train_test_split_demo(x, y, degree, split_ratio, seed)
In [ ]:
def ridge_regression(y, tx, lambda_):
"""implement ridge regression."""
aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
a = tx.T.dot(tx) + aI
b = tx.T.dot(y)
return np.linalg.solve(a, b)
In [ ]:
def ridge_regression_demo(x, y, degree, ratio, seed):
"""ridge regression demo."""
# define parameter
lambdas = np.logspace(-5, 0, 15)
# split data
x_tr, x_te, y_tr, y_te = split_data(x, y, ratio, seed)
# form tx
tx_tr = build_poly(x_tr, degree)
tx_te = build_poly(x_te, degree)
# ridge regression with different lambda
rmse_tr = []
rmse_te = []
for ind, lambda_ in enumerate(lambdas):
# ridge regression
weight = ridge_regression(y_tr, tx_tr, lambda_)
rmse_tr.append(np.sqrt(2 * compute_mse(y_tr, tx_tr, weight)))
rmse_te.append(np.sqrt(2 * compute_mse(y_te, tx_te, weight)))
print("proportion={p}, degree={d}, lambda={l:.3f}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
p=ratio, d=degree, l=lambda_, tr=rmse_tr[ind], te=rmse_te[ind]))
plot_train_test(rmse_tr, rmse_te, lambdas, degree)
Demo time
In [ ]:
seed = 56
degree = 7
split_ratio = 0.5
ridge_regression_demo(x, y, degree, split_ratio, seed)
In [ ]: