Homework: https://work.caltech.edu/homework/hw6.pdf

✔ Answers:

b ✔
a ✔
d ✔
e ✔
d ✔
b ✔
c ✔
e --> d
a ✔
c --> e

Answer key: https://work.caltech.edu/homework/hw6_sol.pdf



In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display



In [2]:

    
# Questions 2 through 6
# training set: http://work.caltech.edu/data/in.dta
# test set: http://work.caltech.edu/data/out.dta
train = pd.read_fwf('http://work.caltech.edu/data/in.dta', header=None)
test = pd.read_fwf('http://work.caltech.edu/data/out.dta', header=None)
display(train.head())
display(test.head())



In [3]:

    
train_x = train.iloc[:, :2]
train_y = train.iloc[:, 2]
test_x = test.iloc[:, :2]
test_y = test.iloc[:, 2]



In [4]:

    
def transform(x):
    x1 = x.iloc[:, 0]
    x2 = x.iloc[:, 1]
    
    z = pd.DataFrame(np.ones(len(x)))
    z.loc[:, 1] = x1
    z.loc[:, 2] = x2
    z.loc[:, 3] = x1 ** 2
    z.loc[:, 4] = x2 ** 2
    z.loc[:, 5] = x1 * x2
    z.loc[:, 6] = np.abs(x1 - x2)
    z.loc[:, 7] = np.abs(x1 + x2)
    return z

train_z = transform(train_x)
train_z.head()



In [5]:

    
test_z = transform(test_x)
test_z.head()



In [6]:

    
from numpy.linalg import inv
def linear_regression(Z, y):
    Z_T_Z = np.dot(Z.T, Z)
    Z_T_Z_inverse = inv(Z_T_Z)
    Z_dagger = np.dot(Z_T_Z_inverse, Z.T)
    weights = np.dot(Z_dagger, y)
    return weights

def linear_regression_with_regularization(Z, y, lambda_reg):
    Z_T_Z = np.dot(Z.T, Z)
    Z_T_Z_reg = Z_T_Z + (lambda_reg * np.identity(np.size(Z_T_Z, 0)))
    Z_T_Z_reg_inverse = inv(Z_T_Z_reg)
    Z_dagger = np.dot(Z_T_Z_reg_inverse, Z.T)
    weights = np.dot(Z_dagger, y)
    return weights



In [7]:

    
def error(W, X, y):
    misclassified = np.sign(np.dot(X, W)) != y
    return 1.0 * misclassified.sum()/len(misclassified)



In [13]:

    
weights = linear_regression(train_z, train_y)
print np.round(error(weights, train_z, train_y), 2), 
print np.round(error(weights, test_z, test_y), 2)



In [14]:

    
k = -3
weights = linear_regression_with_regularization(train_z, train_y, 10 ** k)
print np.round(error(weights, train_z, train_y), 2), 
print np.round(error(weights, test_z, test_y), 2)



In [15]:

    
k = 3
weights = linear_regression_with_regularization(train_z, train_y, 10 ** k)
print np.round(error(weights, train_z, train_y), 2), 
print np.round(error(weights, test_z, test_y), 2)



In [16]:

    
for k in np.arange(-2, 3):
    weights = linear_regression_with_regularization(train_z, train_y, 10 ** k)
    print k, np.round(error(weights, train_z, train_y), 2), np.round(error(weights, test_z, test_y), 2)









    



-2 0.03 0.08
-1 0.03 0.06
0 0.0 0.09
1 0.06 0.12
2 0.2 0.23



In [ ]:

	0	1	2
0	-0.779470	0.838221	1
1	0.155635	0.895377	1
2	-0.059908	-0.717780	1
3	0.207596	0.758933	1
4	-0.195983	-0.375487	-1

	0	1	2
0	-0.106006	-0.081467	-1
1	0.177930	-0.345951	-1
2	0.102162	0.718258	1
3	0.694078	0.623397	-1
4	0.023541	0.727432	1

	0	1	2	3	4	5	6	7
0	1	-0.779470	0.838221	0.607574	0.702615	-0.653369	1.617692	0.058751
1	1	0.155635	0.895377	0.024222	0.801701	0.139352	0.739743	1.051012
2	1	-0.059908	-0.717780	0.003589	0.515208	0.043001	0.657872	0.777688
3	1	0.207596	0.758933	0.043096	0.575980	0.157552	0.551337	0.966530
4	1	-0.195983	-0.375487	0.038409	0.140991	0.073589	0.179504	0.571470

	0	1	2	3	4	5	6	7
0	1	-0.106006	-0.081467	0.011237	0.006637	0.008636	0.024539	0.187473
1	1	0.177930	-0.345951	0.031659	0.119682	-0.061555	0.523881	0.168022
2	1	0.102162	0.718258	0.010437	0.515895	0.073378	0.616097	0.820420
3	1	0.694078	0.623397	0.481745	0.388624	0.432687	0.070681	1.317476
4	1	0.023541	0.727432	0.000554	0.529158	0.017125	0.703891	0.750973