Homework: https://work.caltech.edu/homework/hw5.pdf
Answers:
Answer key: https://work.caltech.edu/homework/hw5_sol.pdf
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
In [2]:
# Question 1
def expected_error(sigma, d, N):
return sigma*sigma*(1.0 - (d+1)/N)
for N in [10, 25, 100, 500, 1000]:
print "N: {}, expected_error: {}".format(N, expected_error(0.1, 8.0, N))
In [3]:
# Question 4 - 7
def error(u, v):
return (u * np.exp(v) - 2.0 * v * np.exp(-u))**2.0
def derivative_u(u, v):
return 2 * (np.exp(v) + 2.0 * v * np.exp(-u)) * (u * np.exp(v) - 2.0 * v * np.exp(-u))
def derivative_v(u, v):
return 2 * (u * np.exp(v) - 2.0 * np.exp(-u)) * (u * np.exp(v) - 2.0 * v * np.exp(-u))
In [4]:
# Question 5
def gradient_descent_update(u, v, rate=0.1):
return u - rate * derivative_u(u, v), v - rate * derivative_v(u, v)
u = v = 1
for iter in range(1, 18):
u, v = gradient_descent_update(u, v)
print "{}: ({}, {}) ---> {}".format(iter, u, v, error(u, v))
In [5]:
# Question 7
def coordinate_descent_update_u(u, v, rate=0.1):
return u - rate * derivative_u(u, v), v
def coordinate_descent_update_v(u, v, rate=0.1):
return u, v - rate * derivative_v(u, v)
u = v = 1
for iter in range(1, 16):
u, v = coordinate_descent_update_u(u, v)
print "{} part 1: ({}, {}) ---> {}".format(iter, u, v, error(u, v))
u, v = coordinate_descent_update_v(u, v)
print "{} part 2: ({}, {}) ---> {}".format(iter, u, v, error(u, v))
In [38]:
# Questions 8 and 9
class Line:
'''A line that passes through the first two points of the given array'''
def __init__(self, x1_s, x2_s, weights=None, name="Unknown"):
self.name = name
if weights is not None:
assert len(weights) == 3
w0, w1, w2 = weights
if w2 != 0:
w0 /= w2
w1 /= w2
self.intercept = -w0
self.slope = -w1
else:
assert len(x1_s) == 2
assert len(x2_s) == 2
self.slope = (x2_s[1]-x2_s[0])/(x1_s[1]-x1_s[0])
self.intercept = x2_s[0] - self.slope * x1_s[0]
def get_y(self, x):
return self.intercept + self.slope * x
def get_sign(self, x1, x2):
return np.sign(- self.intercept - self.slope*x1 + x2 )
def get_weights(self):
return np.array([-self.intercept, -self.slope, 1])
def generate_target_function(x_min, x_max, name = "Target"):
x1 = list(np.random.uniform(x_min, x_max, size=2))
x2 = list(np.random.uniform(x_min, x_max, size=2))
return Line(x1, x2, name=name)
def generate_sample(target_function, N):
sample_x1 = list(np.random.uniform(x_min, x_max, size=N))
sample_x2 = list(np.random.uniform(x_min, x_max, size=N))
sample_y = [target_function.get_sign(x1, x2) for x1, x2 in zip(sample_x1, sample_x2)]
return sample_x1, sample_x2, sample_y
def cross_entropy_error(X, y, weights):
return np.mean(np.log(1 + np.exp(-np.dot(X, weights) * y.T)))
def gradient(X, y, weights):
return -np.mean((X * y)/(1 + np.exp(np.dot(X, weights) * y.T)).T, axis = 0)
def update(X, y, weights, rate=0.01):
g = gradient(X, y, weights)
new_weights = weights - rate * g
return new_weights
def batch_gradient_descent(X, y, weights, max_iter=50000, magnitude_threshold=0.01):
w = weights
for iter in range(max_iter):
old_w = w
w = update(X, y, w)
if np.linalg.norm(old_w - w) < magnitude_threshold:
break
return w, iter+1
def stochastic_gradient_descent(X, y, weights, max_iter=50000, magnitude_threshold=0.01):
w = weights
for iter in range(max_iter):
old_w = w
for i in np.random.permutation(range(np.size(X, 0))):
w = update(X[[i]], y[[i]], w)
if np.linalg.norm(old_w - w) < magnitude_threshold:
break
return w, iter+1
In [39]:
x_min = -1
x_max = 1
target_function = generate_target_function(x_min, x_max, name = "Target")
N = 100
sample_x1, sample_x2, sample_y = generate_sample(target_function, N)
plt.axis([x_min, x_max, x_min, x_max])
#plt.scatter(target_function_x1, target_function_x2, color='#006400')
plt.plot([x_min, x_max], [target_function.get_y(x_min), target_function.get_y(x_max)], color='green')
plt.scatter(sample_x1, sample_x2, c=sample_y, marker='x')
Out[39]:
In [40]:
X = np.column_stack((np.ones(np.size(sample_x1)), sample_x1, sample_x2))
y = np.array([sample_y]).T
init_weights = [0, 0, 0]
print "Error: {}".format(cross_entropy_error(X, y, init_weights))
In [41]:
batch_weights, iterations = batch_gradient_descent(X, y, init_weights)
batch_weights /= batch_weights[-1]
print "Batch weights (after {} iterations): {}\nError: {}".format(iterations, batch_weights, cross_entropy_error(X, y, batch_weights))
In [42]:
stochastic_weights, iterations = stochastic_gradient_descent(X, y, init_weights)
stochastic_weights /= stochastic_weights[-1]
print "Stochastic weights (after {} iterations): {}\nError: {}".format(iterations, stochastic_weights, cross_entropy_error(X, y, stochastic_weights))
In [43]:
init_function = Line(None, None, init_weights, name="Init {}".format(init_weights))
batch_function = Line(None, None, batch_weights, name = "Batch {}".format(batch_weights))
stochastic_function = Line(None, None, stochastic_weights, name = "Stochastic {}".format(stochastic_weights))
plt.axis([x_min, x_max, x_min, x_max])
#plt.scatter(target_function_x1, target_function_x2, color='#006400')
plt.scatter(sample_x1, sample_x2, c=sample_y, marker='x')
for function in [target_function, init_function, batch_function, stochastic_function]:
plt.plot([x_min, x_max], [function.get_y(x_min), function.get_y(x_max)], label=function.name)
plt.legend(bbox_to_anchor=(1.5, 1.5))
Out[43]:
In [44]:
N = 1000
out_sample_x1, out_sample_x2, out_sample_y = generate_sample(target_function, N)
X_out = np.column_stack((np.ones(np.size(out_sample_x1)), out_sample_x1, out_sample_x2))
y_out = np.array([out_sample_y]).T
predictions = np.array([[stochastic_function.get_sign(x[1], x[2]) for x in X_out]]).T
misclassified = predictions != y_out
out_sample_error = 1.0 * misclassified.sum()/len(misclassified)
out_sample_error
Out[44]:
In [45]:
def run_experiment(num_runs=100):
x_min = -1
x_max = 1
E_out = 0
avg_iterations = 0
for i in range(num_runs):
target_function = generate_target_function(x_min, x_max, name = "Target")
N = 100
sample_x1, sample_x2, sample_y = generate_sample(target_function, N)
X = np.column_stack((np.ones(np.size(sample_x1)), sample_x1, sample_x2))
y = np.array([sample_y]).T
init_weights = [0, 0, 0]
stochastic_weights, iterations = stochastic_gradient_descent(X, y, init_weights)
N_out = 1000
out_sample_x1, out_sample_x2, out_sample_y = generate_sample(target_function, N_out)
X_out = np.column_stack((np.ones(np.size(out_sample_x1)), out_sample_x1, out_sample_x2))
y_out = np.array([out_sample_y]).T
out_sample_error = cross_entropy_error(X_out, y_out, stochastic_weights)
E_out += out_sample_error
avg_iterations += iterations
E_out /= num_runs
avg_iterations /= num_runs
print "Avg error_out: {} (after {} iterations)".format(E_out, avg_iterations)
return E_out, avg_iterations
run_experiment()
Out[45]:
In [ ]:
# Stochastic
# Avg error_out: 16.3832864456 (after 1022 iterations)
# Batch
# Avg error_out: 1.48631164758 (after 120 iterations)