In [1]:
%matplotlib inline

In [2]:
# Using multiclass classification algorithms either give you
# multiclass classification or you have to define a scheme for
# comparison.

In [3]:
# We will use OneVsRestClassifier to create a classifier for each
# class.

In [4]:
# This will walk through a cursory example of a Decision Tree
# fitting a multiclass dataset.

In [5]:
from sklearn import datasets

In [6]:
X, y = datasets.make_classification(n_samples=10000,
                                    n_classes=3,
                                    n_informative=3)

In [7]:
from sklearn.tree import DecisionTreeClassifier

In [8]:
dt = DecisionTreeClassifier()

In [9]:
dt.fit(X, y)
dt.predict(X)


Out[9]:
array([0, 2, 1, ..., 2, 2, 0])

In [10]:
# this shows how easy it is to create a classifier.
# Now create a OneVsRestClassifier

In [11]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression

In [12]:
# Since we want to train multiple, separate LogisticRegression
# models, we can pass a LogisticRegression model to OneVsRest...
mlr = OneVsRestClassifier(LogisticRegression(), n_jobs=2)
mlr.fit(X, y)
mlr.predict(X)


Out[12]:
array([0, 0, 1, ..., 1, 2, 0])

In [14]:
# Look at how to create our own OneVsRestClassifier:
# First, construct a way to iterate through the classes and
# train a classifier for each classifier. Then, predict each
# class first.

In [15]:
import numpy as np

In [16]:
def train_one_vs_rest(y, class_label):
    y_train = (y == class_label).astype(int)
    return y_train

classifiers = []
for class_i in sorted(np.unique(y)):
    l = LogisticRegression()
    y_train = train_one_vs_rest(y, class_i)
    l.fit(X, y_train)
    classifiers.append(l)

In [17]:
# We now have one vs rest scheme set up. Next, we need to evaluate
# the data point's likelihood for each classifier. Then, assign
# the classifier to the data point with the largest likelihood.

In [18]:
# predict X[0]:

In [26]:
for classifier in classifiers:
    print classifier.predict_proba(X[0])
    
mlr.predict(X[0])


[[ 0.13245944  0.86754056]]
[[ 0.69794087  0.30205913]]
[[ 0.98654934  0.01345066]]
Out[26]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [24]:



Out[24]:
array([0, 1, 2])

In [ ]: