In [52]:
from pylab import plot, randn
import numpy as np
A = np.array
rd = lambda x: int(round(x))

In [2]:
blue = np.random.multivariate_normal([1, 0], numpy.identity(2), 10)
orange = np.random.multivariate_normal([0, 1], numpy.identity(2), 10)

blue_obs = []
orange_obs = []

for i in xrange(100):
    center = blue[np.random.randint(10)]
    blue_obs.append(np.random.multivariate_normal(
        center, np.identity(2)/5.0))

for i in xrange(100):
    center = orange[np.random.randint(10)]
    orange_obs.append(np.random.multivariate_normal(
        center, np.identity(2)/5.0))

In [5]:
from numpy import matrix as M
from numpy import dot
from numpy.linalg import inv

def train_linear_model(xs, ys):
    """
    Fits a linear model to the training data `xs`, as described in
    [Elements of Statistical Learning][1], page 12.

    [1]: http://www-stat.stanford.edu/~tibs/ElemStatLearn/
    """
    # add a column of 1s to the input matrix, to correspond to the beta-nought
    # or constant term in the model
    X = np.concatenate((M([1] * len(xs)).T, M(xs)), axis=1)

    _y = M(ys)
    y = _y.T if _y.shape[0] == 1 else _y
    return dot(dot(inv(dot(X.T,  X)),  X.T), y)

In [6]:
def model_and_plot(xs, ys):
    """Trains a linear classifier for two input variables and one categorical
    output, and plots the input points with the decision boundary """
    plt.scatter([x[0] for x in xs],
                [x[1] for x in xs],
                marker="o", c=[["b", "#ff6103"][y_i] for y_i in ys])

    B = train_linear_model(xs, ys)

    resolution = 0.1
    low = np.min(np.array(xs))
    hi = np.max(np.array(xs)) + resolution
    test_xs = np.arange(low, hi, resolution)
    test_ys = np.arange(low, hi, resolution)
    xx, yy = np.meshgrid(test_xs, test_ys)
    intercept = [1] * len(xx.flatten())
    Z = dot(np.array([intercept, xx.flatten(), yy.flatten()]).T,  B)
    Z.shape = xx.shape
    C = plt.contour(xx, yy, Z, [0.5])
    return C

model_and_plot(blue_obs + orange_obs, ([0] * 100) + ([1] * 100))


Out[6]:
<matplotlib.contour.QuadContourSet instance at 0x1056c0560>

In [7]:
xs = [[0., 0.], [1., 1.], [1., 0.], [2., 1.]]

# try playing around with these
ys = [1, 0, 1, 1]

model_and_plot(xs, ys)


Out[7]:
<matplotlib.contour.QuadContourSet instance at 0x10574afc8>

In [44]:
from itertools import izip

def train_knn_model(xs, ys, k=15):
    """
    Returns a function predict(x) -> y, which returns the average of the
    k nearest neighbors of `x` among the training set
    """
    xs, ys = np.array(xs), np.array(ys)
    def dist(a, b):
        return np.linalg.norm(a - b)

    def predict(x):
        knn = sorted(izip(xs, ys), key=lambda (xi, yi): dist(x, xi))[:k]
        return sum([yi for xi, yi in knn]) / float(k)
    return predict

In [58]:
def knn_model_and_plot(xs, ys):
    """Trains a knn classifier, and plots the input points with the decision
    boundary """
    plt.scatter([x[0] for x in xs],
                [x[1] for x in xs],
                marker="o", c=[["b", "#ff6103"][y_i] for y_i in ys])

    predict = train_knn_model(xs, ys)

    resolution = 0.3
    low = np.min(np.array(xs))
    hi = np.max(np.array(xs)) + resolution
    test_xs = np.arange(low, hi, resolution)
    test_ys = np.arange(low, hi, resolution)
    mg = np.meshgrid(test_xs, test_ys)
    ms = mg[0].shape
    mesh = np.array(map(np.ndarray.flatten, mg)).T
    Z = A(map(predict, mesh))
    print predict(mesh[0])
    x0 = A([xx[0] for xx in mesh])
    x1 = A([xx[1] for xx in mesh])

    plt.scatter(x0, x1, marker="+", c=[["b", "#ff6103"][rd(zi)] for zi in Z])

    
    Z.shape = ms;x0.shape = ms;x1.shape = ms
    C = plt.contour(x0, x1, Z, [0.5])
    return C

knn_model_and_plot(blue_obs + orange_obs, ([0] * 100) + ([1] * 100))


0.933333333333
Out[58]:
<matplotlib.contour.QuadContourSet instance at 0x1063c1950>

In [51]:
round(0.4)


Out[51]:
0.0