In [52]:
from pylab import plot, randn
import numpy as np
A = np.array
rd = lambda x: int(round(x))
In [2]:
blue = np.random.multivariate_normal([1, 0], numpy.identity(2), 10)
orange = np.random.multivariate_normal([0, 1], numpy.identity(2), 10)
blue_obs = []
orange_obs = []
for i in xrange(100):
center = blue[np.random.randint(10)]
blue_obs.append(np.random.multivariate_normal(
center, np.identity(2)/5.0))
for i in xrange(100):
center = orange[np.random.randint(10)]
orange_obs.append(np.random.multivariate_normal(
center, np.identity(2)/5.0))
In [5]:
from numpy import matrix as M
from numpy import dot
from numpy.linalg import inv
def train_linear_model(xs, ys):
"""
Fits a linear model to the training data `xs`, as described in
[Elements of Statistical Learning][1], page 12.
[1]: http://www-stat.stanford.edu/~tibs/ElemStatLearn/
"""
# add a column of 1s to the input matrix, to correspond to the beta-nought
# or constant term in the model
X = np.concatenate((M([1] * len(xs)).T, M(xs)), axis=1)
_y = M(ys)
y = _y.T if _y.shape[0] == 1 else _y
return dot(dot(inv(dot(X.T, X)), X.T), y)
In [6]:
def model_and_plot(xs, ys):
"""Trains a linear classifier for two input variables and one categorical
output, and plots the input points with the decision boundary """
plt.scatter([x[0] for x in xs],
[x[1] for x in xs],
marker="o", c=[["b", "#ff6103"][y_i] for y_i in ys])
B = train_linear_model(xs, ys)
resolution = 0.1
low = np.min(np.array(xs))
hi = np.max(np.array(xs)) + resolution
test_xs = np.arange(low, hi, resolution)
test_ys = np.arange(low, hi, resolution)
xx, yy = np.meshgrid(test_xs, test_ys)
intercept = [1] * len(xx.flatten())
Z = dot(np.array([intercept, xx.flatten(), yy.flatten()]).T, B)
Z.shape = xx.shape
C = plt.contour(xx, yy, Z, [0.5])
return C
model_and_plot(blue_obs + orange_obs, ([0] * 100) + ([1] * 100))
Out[6]:
In [7]:
xs = [[0., 0.], [1., 1.], [1., 0.], [2., 1.]]
# try playing around with these
ys = [1, 0, 1, 1]
model_and_plot(xs, ys)
Out[7]:
In [44]:
from itertools import izip
def train_knn_model(xs, ys, k=15):
"""
Returns a function predict(x) -> y, which returns the average of the
k nearest neighbors of `x` among the training set
"""
xs, ys = np.array(xs), np.array(ys)
def dist(a, b):
return np.linalg.norm(a - b)
def predict(x):
knn = sorted(izip(xs, ys), key=lambda (xi, yi): dist(x, xi))[:k]
return sum([yi for xi, yi in knn]) / float(k)
return predict
In [58]:
def knn_model_and_plot(xs, ys):
"""Trains a knn classifier, and plots the input points with the decision
boundary """
plt.scatter([x[0] for x in xs],
[x[1] for x in xs],
marker="o", c=[["b", "#ff6103"][y_i] for y_i in ys])
predict = train_knn_model(xs, ys)
resolution = 0.3
low = np.min(np.array(xs))
hi = np.max(np.array(xs)) + resolution
test_xs = np.arange(low, hi, resolution)
test_ys = np.arange(low, hi, resolution)
mg = np.meshgrid(test_xs, test_ys)
ms = mg[0].shape
mesh = np.array(map(np.ndarray.flatten, mg)).T
Z = A(map(predict, mesh))
print predict(mesh[0])
x0 = A([xx[0] for xx in mesh])
x1 = A([xx[1] for xx in mesh])
plt.scatter(x0, x1, marker="+", c=[["b", "#ff6103"][rd(zi)] for zi in Z])
Z.shape = ms;x0.shape = ms;x1.shape = ms
C = plt.contour(x0, x1, Z, [0.5])
return C
knn_model_and_plot(blue_obs + orange_obs, ([0] * 100) + ([1] * 100))
Out[58]:
In [51]:
round(0.4)
Out[51]: