In this problem, you will create your own target function $f$ and data set $\mathcal D$ to see how the Perceptron Learning Algorithm works. Take $d = 2$ so you can visualize the problem, and assume $\mathcal X = [-1, 1] \times [-1, 1]$ with uniform probability of picking each $\mathbf x \in \mathcal X$.
In each run, choose a random line in the plane as your target function $f$ (do this by taking two random, uniformly distributed points in $[-1, 1] \times [-1, 1]$ and taking the line passing through them), where one side of the line maps to $+1$ and the other maps to $-1$. Choose the inputs $\mathbf x_n$ of the data set as random points (uniformly in $\mathcal X$), and evaluate the target function on each $\mathbf x_n$ to get the corresponding output $\mathbf y_n$.
Now, in each run, use the Perceptron Learning Algorithm to find $g$. Start the PLA with the weight vector $\mathbf w$ being all $zeros$ (consider $sign(0) = 0$, so all points are initially misclassified), and at each iteration have the algorithm choose a point randomly from the set of misclassified points. We are interested in two quantities: the number of iterations that PLA takes to converge to $g$, and the disagreement between $f$ and $g$ which is $\mathbf P[f(\mathbf x) \neq g(\mathbf x)]$ (the probability that $f$ and $g$ will disagree on their classification of a random point). You can either calculate this probability exactly, or approximate it by generating a sufficiently large, separate set of points to estimate it. In order to get a reliable estimate for these two quantities, you should repeat the experiment for $1000$ runs (each run as specified above) and take the average over these runs.
In [2]:
# init stuff
import numpy as np
# np.random.seed(1)
space = [-1, 1]
N, d = 100, 2
M = 100 # number of points in test set
RUNS = 1000
MAX_ITERS = 10000 # the highest among given options
In [3]:
def misclassified(a, b):
func = lambda x,y: 1 if x!=y else 0
func = np.vectorize(func)
return np.where(func(a, b))[0]
def test_perceptron(w, dataset):
X = dataset[N:, :-1]
y = dataset[N:, -1]
# add synthetic column
ones = np.reshape(np.ones(M), (M,1))
X = np.concatenate((ones, X), 1)
y_estimates = np.sign(np.dot(X, w.T))
y_estimates = np.reshape(y_estimates, M)
return misclassified(y, y_estimates).shape[0] / M
def perceptron(dataset):
X = dataset[:N, :-1]
y = dataset[:N, -1]
# add synthetic column
ones = np.reshape(np.ones(N), (N,1))
X = np.concatenate((ones, X), 1)
w = np.zeros(d+1)
# init training
count = 0
while count < MAX_ITERS:
y_estimates = np.sign(np.dot(X, w.T))
y_estimates = np.reshape(y_estimates, N)
missed_indices = misclassified(y, y_estimates)
if missed_indices.shape[0] == 0:
# converged
break
rand_int = np.random.randint(0, missed_indices.shape[0], 1)[0]
rand_index = missed_indices[rand_int]
w = w + y[rand_index] * X[rand_index]
count += 1
error = test_perceptron(w, dataset)
return (count, error)
In [4]:
# points for custom target function
def prepare_data():
P1 = np.random.uniform(*space, d)
P2 = np.random.uniform(*space, d)
# construct target function
slope = (P2[1] - P1[1]) / (P2[0] - P1[0])
y_intercept = P1[1] - slope * P1[0]
target_f = lambda data_point: slope * data_point[0] + y_intercept
# custom data set
X = np.random.uniform(*space, (N + M,d))
# label points in dataset
func = lambda x: target_f(x) - x[1]
y = np.apply_along_axis(func, 1, X)
y = np.sign(y)
# combine data set with targets
dataset = np.concatenate((X, np.reshape(y, (N + M, 1))), 1)
return dataset
def get_proper_data():
while True:
dataset = prepare_data()
y = dataset[:, -1]
pos_count = (y == 1).sum()
neg_count = (y == -1).sum()
if pos_count and neg_count:
# need both classes for training
break
return dataset
def run():
dataset = get_proper_data()
return perceptron(dataset)
In [5]:
def main():
iters = []
errors = []
for _ in range(RUNS):
c, e = run()
iters.append(c)
errors.append(e)
print('Average iterations for {0} = {1}'.format(N, np.mean(iters)))
print('Average error for {0} = {1}'.format(N, np.mean(errors)))
print('')
# questions 7,8
N = 10
M = 10
main()
# questions 9,10
N = 100
M = 100
main()