In [1]:
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import mltools as ml
np.random.seed(0)
%matplotlib inline
In [2]:
lc2_data = np.genfromtxt('./lc2_data.txt', delimiter=None)
X, Y = lc2_data[:, :-1], lc2_data[:, -1]
In [3]:
f, ax = plt.subplots(1, 2, figsize=(20, 8))
mask = Y == -1
ax[0].scatter(X[mask, 0], X[mask, 1], s=120, color='blue', marker='s', alpha=0.75)
ax[0].scatter(X[~mask, 0], X[~mask, 1], s=340, color='red', marker='*', alpha=0.75)
ax[0].set_xticklabels(ax[0].get_xticks(), fontsize=25)
ax[0].set_yticklabels(ax[0].get_yticks(), fontsize=25)
ax[1].scatter(X[:, 0], X[:, 1], s=120, color='black', alpha=0.75)
ax[1].set_xticklabels(ax[1].get_xticks(), fontsize=25)
ax[1].set_yticklabels(ax[1].get_yticks(), fontsize=25)
plt.show()
Some of the questions that were asked in class by me or by the students. Make sure you know how to answer all of them :)
Give me a real life example.
My task if to answer 'See this new point? Should it be red or blue?' -- which one do I need?
In the rest of the discussion we will show how to code a classifier from the ground up. This will be extremely useful not only for your homework assignment but also for future references. Most ML coding tend to be similar to one another so this will be reusable even in super complicated models.
As a simple example we will use the Perceptron Algorithm. We will build each part seperately, showing how it works and end by wrapping it all up in a classifier class that can be used with the mltools library.
We will use a 2 classes Perceptron with classes $\{-1, 1\}$. In the discussion you can also see how to use a binary classes $\{0, 1\}$ and in the wiki page you can see a generalization to multiple classes.
For an illustration of the algorithm you can watch this YouTube clip
The Perceptron used a decidion boundry $\theta$ to compute a value of each point. Then with a simple sign threshold decides on the class.
We'll start by computing the decision value for each point $x^j$: $$\theta x^j$$
Let's choose $j=90$ and let's define: $$\theta = \left[-6, 0.5, 1\right]$$
In [4]:
theta = np.array([-6., 0.5, 1.])
Notice the '.'s. This will make sure it's a float and not integer which can casue problems later down the line.
$\theta$ has three features that will correspond to the constant (also known as the 'bias' or 'intercept') and two for the two features of X. So first we will add a constant to all the X data.
Do not use the fpoly to do that, the behavior of that function is unexpected when there is more than one feature.
In [5]:
def add_const(X):
return np.hstack([np.ones([X.shape[0], 1]), X])
Xconst = add_const(X)
x_j, y_j = Xconst[90], Y[90]
In [6]:
x_theta = 0
for i in range(x_j.shape[0]):
x_theta += x_j[i] * theta[i]
print x_theta
This is a VERY inefficient way to do that. Luckily for us, numpy has the answer in the form of np.dot().
In [7]:
print np.dot(x_j, theta)
Now let's compute the decision classification $T[\theta x^j]$. One option is to use the np.sign method. This will not a a good solution because np.sign(0) = 0.
One way of solving it is to use epsilon.
In [8]:
eps = 1e-200
def sign(vals):
"""Returns 1 if val >= 0 else -1"""
return np.sign(vals + eps)
In [9]:
def predict(x_j, theta):
"""Returns the class prediction of a single point x_j"""
return sign(np.dot(x_j, theta))
In [10]:
print predict(x_j, theta)
In [11]:
def predict_with_np_sum(X, theta):
"""Predicts the class value for multiple points or a single point at the same time. """
X = np.atleast_2d(X)
return np.sum(theta * X, axis=1)
In [12]:
def pred_err(X, Y, theta):
"""Predicts that class for X and returns the error rate. """
Yhat = predict(X, theta)
return np.mean(Yhat != Y)
In [13]:
print pred_err(x_j, y_j, theta)
In [14]:
a = 0.1
y_hat_j = predict(x_j, theta)
print theta + a * (y_j - y_hat_j) * x_j
In [15]:
def train(X, Y, a=0.01, stop_tol=1e-8, max_iter=1000):
# Start by adding a const
Xconst = add_const(X)
m, n = Xconst.shape
# Initializing theta
theta = np.array([-6., 0.5, 1.])
# The update loops
J_err = [np.inf]
for i in range(1, max_iter + 1):
for j in range(m):
x_j, y_j = Xconst[j], Y[j]
y_hat_j = predict(x_j, theta)
theta += a * (y_j - y_hat_j) * x_j
curr_err = pred_err(Xconst, Y, theta)
J_err.append(curr_err)
if np.abs(J_err[-2] - J_err[-1]) < stop_tol:
print 'Reached convergance after %d iterations. Prediction error is: %.3f' % (i, J_err[-1])
break
return theta
In [16]:
theta_trained = train(X, Y)
Now let's use all the code that we wrote and create a Python class Perceptron that can plug in to the mltools package.
In order to do that, the Prceptron class has to inherit the object mltools.base.classifier
In case you haven't looked at the actual code in the mltools, now will probably be the right time.
In [17]:
from mltools.base import classifier
In order to crete an object, we'll have to add self to all the methods.
In [18]:
class Perceptron(classifier):
def __init__(self, theta=None):
self.theta = theta
def predict(self, X):
"""Retruns class prediction for either single point or multiple points. """
# I'm addiing this stuff here so it could work with the plotClassify2D method.
Xconst = np.atleast_2d(X)
# Making sure it has the const, if not adding it.
if Xconst.shape[1] == self.theta.shape[0] - 1:
Xconst = add_const(Xconst)
return self.sign(np.dot(Xconst, self.theta))
def sign(self, vals):
"""A sign version with breaking 0's as +1. """
return np.sign(vals + 1e-200)
def pred_err(self, X, Y):
Yhat = self.predict(X)
return np.mean(Yhat != Y)
def train(self, X, Y, a=0.02, stop_tol=1e-8, max_iter=1000):
# Start by adding a const
Xconst = add_const(X)
m, n = Xconst.shape
# Making sure Theta is inititialized.
if self.theta is None:
self.theta = np.random.random(n)
# The update loops
J_err = [np.inf]
for i in range(1, max_iter + 1):
for j in range(m):
x_j, y_j = Xconst[j], Y[j]
y_hat_j = self.predict(x_j)
self.theta += a * (y_j - y_hat_j) * x_j
curr_err = self.pred_err(Xconst, Y)
J_err.append(curr_err)
if np.abs(J_err[-2] - J_err[-1]) < stop_tol:
print 'Reached convergance after %d iterations. Prediction error is: %.3f' % (i, J_err[-1])
break
First let's create the model with some initialized theta and plot the decision bounderies. For the plotting we can use the mltools plotClassify2D !!! wowowowo!!!!
In [19]:
model = Perceptron()
model.theta = np.array([-6., 0.5, 1])
ml.plotClassify2D(model, X, Y)
Next, let's actually train the model and plot the new decision boundery.
In [20]:
model.train(X, Y)
ml.plotClassify2D(model, X, Y)
We found the best classifier!!!