In [1]:
    
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
    
In [15]:
    
def compute_cost_MSE(y, tx, beta):
    """compute the loss by mse."""
    e = y - tx.dot(beta)
    mse = e.dot(e) / (2 * len(e))
    return mse
def compute_cost_MAE(y, tx, w):
    y = np.array(y)
    return np.sum(abs(y - np.dot(tx, w))) / y.shape[0]
def least_squares(y, tx):
    """calculate the least squares solution."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # least squares: TODO
    # returns mse, and optimal weights
    # ***************************************************
    weight = np.linalg.solve(np.dot(tx.T,tx), np.dot(tx.T,y))
    return least_square_mse(y,tx, weight),weight
    
def least_square_mse(y, tx, w):
    return compute_cost_MSE(y, tx, w)
def rmse(y, tx, w):
    return np.sqrt(compute_cost_MSE)
    
In [3]:
    
from helpers import *
def test_your_least_squares():
    height, weight, gender = load_data_from_ex02(sub_sample=False, add_outlier=False)
    x, mean_x, std_x = standardize(height)
    y, tx = build_model_data(x, weight)
    # ***************************************************
    # INSERT YOUR CODE HERE
    # least square or grid search: TODO
    # this code should compare the optimal weights obtained 
    # by least squares vs. grid search
    # ***************************************************
    mse, lsq_w = least_squares(y,tx)
    print(lsq_w)
test_your_least_squares()
    
    
In [4]:
    
# load dataset
x, y = load_data()
print("shape of x {}".format(x.shape))
print("shape of y {}".format(y.shape))
    
    
In [5]:
    
def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=0 up to j=degree."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # polynomial basis function: TODO
    # this function should return the matrix formed
    # by applying the polynomial basis to the input data
    # ***************************************************
    x = np.array(x)
    res = x
    for d in range(2, degree + 1):
        res = np.concatenate((res, x ** d), axis=-1)
    # print(len(x),degree)
    # print(res)
    res = np.reshape(res, (degree, len(x)))
    res = np.c_[np.ones((len(res.T), 1)),res.T]
    return res
def build_poly2(x, degree):
    """polynomial basis function."""
    X = np.ones((x.shape[0], degree + 1))
    for i in range(degree):
        X[:, i + 1:degree + 1] *= x[:, np.newaxis]
    return X
    
In [9]:
    
test = np.array(range(10))
build_poly2(test,2)
    
    Out[9]:
Let us play with polynomial regression. Note that we will use your implemented function compute_mse. Please copy and paste your implementation from exercise02.
In [16]:
    
from plots import *
# from .build_polynomial import * 
def polynomial_regression():
    """Constructing the polynomial basis function expansion of the data,
       and then running least squares regression."""
    # define parameters
    degrees = [1, 3, 7, 12]
    
    # define the structure of figure
    num_row = 2
    num_col = 2
    f, axs = plt.subplots(num_row, num_col)
    for ind, degree in enumerate(degrees):
        # ***************************************************
        # INSERT YOUR CODE HERE
        # form the data to do polynomial regression.: TODO
        # ***************************************************
        x_degree = build_poly(x,degree)
        # ***************************************************
        # INSERT YOUR CODE HERE
        # least square and calculate rmse: TODO
        # ***************************************************
        lsq_degree, weight = least_squares(y,x_degree)
#         print(weight)
        rmse = np.sqrt(2*lsq_degree)
        print("Processing {i}th experiment, degree={d}, rmse={loss}".format(
              i=ind + 1, d=degree, loss=rmse))
        # plot fit
        plot_fitted_curve(
            y, x, weight, degree, axs[ind // num_col][ind % num_col])
    plt.tight_layout()
    plt.savefig("visualize_polynomial_regression")
    plt.show()
polynomial_regression()
    
    
    
In [10]:
    
def split_data(x, y, ratio, seed=1):
    """split the dataset based on the split ratio."""
    # set seed
    np.random.seed(seed)
    # ***************************************************
    # INSERT YOUR CODE HERE
    # split the data based on the given ratio: TODO
    # ***************************************************
    # Random shuffle the index by enumerate.
    pair = np.c_[x,y]
    np.random.shuffle(pair)
    index = np.round(x.size * ratio,0).astype('int16')
    p1, p2 = np.split(pair,[index])
    x1,y1 = zip(*p1)
    x2,y2 = zip(*p2)
    return x1,y1,x2,y2
def split_data2(x, y, ratio, seed=1):
    """split the dataset based on the split ratio."""
    # set seed
    np.random.seed(seed)
    ntr     = round(y.shape[0] * ratio)
    ind     = np.random.permutation(range(y.shape[0]))
    x_tr    = x[ind[:ntr]]
    x_te    = x[ind[ntr:]]
    y_tr    = y[ind[:ntr]]
    y_te    = y[ind[ntr:]]
    return (x_tr, y_tr , x_te , y_te)
test_x = np.array( range(0,10))
test_y = np.array(range(0,10))
print(split_data(test_x, test_y, 0.5))
print(split_data2(test_x, test_y, 0.5))
    
    
Then, test your split_data function below.
In [70]:
    
def train_test_split_demo(x, y, degree, ratio, seed):
    """polynomial regression with different split ratios and different degrees."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # split the data, and return train and test data: TODO
    # ***************************************************
    trainX,trainY,testX,testY = split_data(x,y,ratio,seed)
    # ***************************************************
    # INSERT YOUR CODE HERE
    # form train and test data with polynomial basis function: TODO
    # ***************************************************
    #     print(len(trainX))
    # trainX = np.c_[np.ones((len(trainX),1)), build_poly(trainX,degree)]
    # testX = np.c_[np.ones((len(testX),1)), build_poly(testX,degree)]
    trainX = build_poly(trainX, degree)
    testX = build_poly(testX, degree)
    # ***************************************************
    # INSERT YOUR CODE HERE
    # calcualte weight through least square.: TODO
    # ***************************************************
    mse, weight = least_squares(trainY,trainX)
    # ***************************************************
    # INSERT YOUR CODE HERE
    # calculate RMSE for train and test data,
    # and store them in rmse_tr and rmse_te respectively: TODO
    # ***************************************************
    mse_test = np.sum((testY-np.dot(testX,weight))**2)/len(testY)
    rmse_tr = np.sqrt(2*mse)
    rmse_te = np.sqrt(2*mse_test)
    print("proportion={p}, degree={d}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
          p=ratio, d=degree, tr=rmse_tr, te=rmse_te))
seed = 6
degrees = [1,3, 7, 12]
split_ratios = [0.9, 0.5, 0.1]
for split_ratio in split_ratios:
    for degree in degrees:
        train_test_split_demo(x, y, degree, split_ratio, seed)
    
    
In [11]:
    
def ridge_regression(y, tx, lamb):
    """implement ridge regression."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # ridge regression: TODO
    # ***************************************************
    # Hes = tx.T * tx + 2*N*lambda * I_m
    G = np.eye(tx.shape[1])
    G[0,0] = 0
    hes = np.dot(tx.T,tx) + lamb * G
    weight = np.linalg.solve(hes,np.dot(tx.T,y))
    mse = compute_cost_MSE(y, tx, weight)
    return mse,weight
    
In [24]:
    
def ridge_regression_demo(x, y, degree, ratio, seed):
    """ridge regression demo."""
    # define parameter
    lambdas = np.logspace(-3, 1, 10)   
    
    trainX,trainY,testX,testY = split_data(x,y,ratio,seed)
    trainX = build_poly(trainX,degree)
    testX =  build_poly(testX,degree)
    
    _rmse_te = []
    _rmse_tr = []
    
      # define the structure of figure
#     num_row = 6
#     num_col = 2
#     f, axs = plt.subplots(num_row, num_col)
    
    for ind, lamb in enumerate(lambdas):
        mse, weight = ridge_regression(trainY,trainX,lamb)
        # ***************************************************
        # INSERT YOUR CODE HERE
        # calculate RMSE for train and test data,
        # and store them in rmse_tr and rmse_te respectively: TODO
        # ***************************************************
        mse_test = compute_cost_MSE(testY, testX, weight)
        rmse_tr = np.sqrt(2*mse)
        rmse_te = np.sqrt(2*mse_test)
        _rmse_te.append(rmse_te)
        _rmse_tr.append(rmse_tr)
        print("lambda={l}, proportion={p}, degree={d}, weight={w}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
              l=ind, p=ratio, d=degree, w=len(weight), tr=rmse_tr, te=rmse_te))
        # plot fit
#         plot_fitted_curve(
#             y, x, weight, degree, axs[ind // num_col][ind % num_col])
        
    print(_rmse_te, _rmse_tr)
#     plt.hold(False)
#     rmse_tr_plt, = plt.plot(lambdas, _rmse_tr, 's-b', label="train error")
#     plt.semilogx()
#     plt.hold(True)
#     rmse_te_plt, = plt.plot(lambdas, _rmse_te, 's-r', label="test error")
#     plt.xlabel('lambda')
#     plt.ylabel('rmse')
#     plt.title('ridge regression for polynomial degree {deg}'.format(deg=degree))
#     plt.legend(handles=[rmse_tr_plt, rmse_te_plt])
#     plt.show()
    plot_train_test(_rmse_tr, _rmse_te, lambdas, degree)
seed = 11
degree = 7
split_ratio = 0.5
ridge_regression_demo(x, y, degree, split_ratio, seed)
    
    
    
In [ ]:
    
def polynomial_regression():
    """Constructing the polynomial basis function expansion of the data,
       and then running least squares regression."""
    # define parameters
    degrees = [7]
    
    # define the structure of figure
    num_row = 2
    num_col = 2
    f, axs = plt.subplots(num_row, num_col)
    for ind, degree in enumerate(degrees):
        # ***************************************************
        # INSERT YOUR CODE HERE
        # form the data to do polynomial regression.: TODO
        # ***************************************************
        x_degree = build_poly(x,degree)
        # ***************************************************
        # INSERT YOUR CODE HERE
        # least square and calculate rmse: TODO
        # ***************************************************
        lsq_degree, weight = least_squares(y,x_degree)
#         print(weight)
        rmse = np.sqrt(2*lsq_degree)
        print("Processing {i}th experiment, degree={d}, rmse={loss}".format(
              i=ind + 1, d=degree, loss=rmse))
        # plot fit
        plot_fitted_curve(
            y, x, weight, degree, axs[ind // num_col][ind % num_col])
    plt.tight_layout()
    plt.savefig("visualize_polynomial_regression")
    plt.show()
polynomial_regression()
    
In [ ]: