Homework: https://work.caltech.edu/homework/hw6.pdf
✔ Answers:
Answer key: https://work.caltech.edu/homework/hw6_sol.pdf
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
In [2]:
# Questions 2 through 6
# training set: http://work.caltech.edu/data/in.dta
# test set: http://work.caltech.edu/data/out.dta
train = pd.read_fwf('http://work.caltech.edu/data/in.dta', header=None)
test = pd.read_fwf('http://work.caltech.edu/data/out.dta', header=None)
display(train.head())
display(test.head())
In [3]:
train_x = train.iloc[:, :2]
train_y = train.iloc[:, 2]
test_x = test.iloc[:, :2]
test_y = test.iloc[:, 2]
In [4]:
def transform(x):
x1 = x.iloc[:, 0]
x2 = x.iloc[:, 1]
z = pd.DataFrame(np.ones(len(x)))
z.loc[:, 1] = x1
z.loc[:, 2] = x2
z.loc[:, 3] = x1 ** 2
z.loc[:, 4] = x2 ** 2
z.loc[:, 5] = x1 * x2
z.loc[:, 6] = np.abs(x1 - x2)
z.loc[:, 7] = np.abs(x1 + x2)
return z
train_z = transform(train_x)
train_z.head()
Out[4]:
In [5]:
test_z = transform(test_x)
test_z.head()
Out[5]:
In [6]:
from numpy.linalg import inv
def linear_regression(Z, y):
Z_T_Z = np.dot(Z.T, Z)
Z_T_Z_inverse = inv(Z_T_Z)
Z_dagger = np.dot(Z_T_Z_inverse, Z.T)
weights = np.dot(Z_dagger, y)
return weights
def linear_regression_with_regularization(Z, y, lambda_reg):
Z_T_Z = np.dot(Z.T, Z)
Z_T_Z_reg = Z_T_Z + (lambda_reg * np.identity(np.size(Z_T_Z, 0)))
Z_T_Z_reg_inverse = inv(Z_T_Z_reg)
Z_dagger = np.dot(Z_T_Z_reg_inverse, Z.T)
weights = np.dot(Z_dagger, y)
return weights
In [7]:
def error(W, X, y):
misclassified = np.sign(np.dot(X, W)) != y
return 1.0 * misclassified.sum()/len(misclassified)
In [13]:
weights = linear_regression(train_z, train_y)
print np.round(error(weights, train_z, train_y), 2),
print np.round(error(weights, test_z, test_y), 2)
In [14]:
k = -3
weights = linear_regression_with_regularization(train_z, train_y, 10 ** k)
print np.round(error(weights, train_z, train_y), 2),
print np.round(error(weights, test_z, test_y), 2)
In [15]:
k = 3
weights = linear_regression_with_regularization(train_z, train_y, 10 ** k)
print np.round(error(weights, train_z, train_y), 2),
print np.round(error(weights, test_z, test_y), 2)
In [16]:
for k in np.arange(-2, 3):
weights = linear_regression_with_regularization(train_z, train_y, 10 ** k)
print k, np.round(error(weights, train_z, train_y), 2), np.round(error(weights, test_z, test_y), 2)
In [ ]: