In [1]:
%matplotlib inline
In [2]:
# Using multiclass classification algorithms either give you
# multiclass classification or you have to define a scheme for
# comparison.
In [3]:
# We will use OneVsRestClassifier to create a classifier for each
# class.
In [4]:
# This will walk through a cursory example of a Decision Tree
# fitting a multiclass dataset.
In [5]:
from sklearn import datasets
In [6]:
X, y = datasets.make_classification(n_samples=10000,
n_classes=3,
n_informative=3)
In [7]:
from sklearn.tree import DecisionTreeClassifier
In [8]:
dt = DecisionTreeClassifier()
In [9]:
dt.fit(X, y)
dt.predict(X)
Out[9]:
In [10]:
# this shows how easy it is to create a classifier.
# Now create a OneVsRestClassifier
In [11]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
In [12]:
# Since we want to train multiple, separate LogisticRegression
# models, we can pass a LogisticRegression model to OneVsRest...
mlr = OneVsRestClassifier(LogisticRegression(), n_jobs=2)
mlr.fit(X, y)
mlr.predict(X)
Out[12]:
In [14]:
# Look at how to create our own OneVsRestClassifier:
# First, construct a way to iterate through the classes and
# train a classifier for each classifier. Then, predict each
# class first.
In [15]:
import numpy as np
In [16]:
def train_one_vs_rest(y, class_label):
y_train = (y == class_label).astype(int)
return y_train
classifiers = []
for class_i in sorted(np.unique(y)):
l = LogisticRegression()
y_train = train_one_vs_rest(y, class_i)
l.fit(X, y_train)
classifiers.append(l)
In [17]:
# We now have one vs rest scheme set up. Next, we need to evaluate
# the data point's likelihood for each classifier. Then, assign
# the classifier to the data point with the largest likelihood.
In [18]:
# predict X[0]:
In [26]:
for classifier in classifiers:
print classifier.predict_proba(X[0])
mlr.predict(X[0])
Out[26]:
In [24]:
Out[24]:
In [ ]: