notebook.community

Edit and run



In [1]:

    
%matplotlib inline



In [2]:

    
# Using multiclass classification algorithms either give you
# multiclass classification or you have to define a scheme for
# comparison.



In [3]:

    
# We will use OneVsRestClassifier to create a classifier for each
# class.



In [4]:

    
# This will walk through a cursory example of a Decision Tree
# fitting a multiclass dataset.



In [5]:

    
from sklearn import datasets



In [6]:

    
X, y = datasets.make_classification(n_samples=10000,
                                    n_classes=3,
                                    n_informative=3)



In [7]:

    
from sklearn.tree import DecisionTreeClassifier



In [8]:

    
dt = DecisionTreeClassifier()



In [9]:

    
dt.fit(X, y)
dt.predict(X)









    Out[9]:





array([0, 2, 1, ..., 2, 2, 0])



In [10]:

    
# this shows how easy it is to create a classifier.
# Now create a OneVsRestClassifier



In [11]:

    
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression



In [12]:

    
# Since we want to train multiple, separate LogisticRegression
# models, we can pass a LogisticRegression model to OneVsRest...
mlr = OneVsRestClassifier(LogisticRegression(), n_jobs=2)
mlr.fit(X, y)
mlr.predict(X)









    Out[12]:





array([0, 0, 1, ..., 1, 2, 0])



In [14]:

    
# Look at how to create our own OneVsRestClassifier:
# First, construct a way to iterate through the classes and
# train a classifier for each classifier. Then, predict each
# class first.



In [15]:

    
import numpy as np



In [16]:

    
def train_one_vs_rest(y, class_label):
    y_train = (y == class_label).astype(int)
    return y_train

classifiers = []
for class_i in sorted(np.unique(y)):
    l = LogisticRegression()
    y_train = train_one_vs_rest(y, class_i)
    l.fit(X, y_train)
    classifiers.append(l)



In [17]:

    
# We now have one vs rest scheme set up. Next, we need to evaluate
# the data point's likelihood for each classifier. Then, assign
# the classifier to the data point with the largest likelihood.



In [18]:

    
# predict X[0]:



In [26]:

    
for classifier in classifiers:
    print classifier.predict_proba(X[0])
    
mlr.predict(X[0])









    



[[ 0.13245944  0.86754056]]
[[ 0.69794087  0.30205913]]
[[ 0.98654934  0.01345066]]






    Out[26]:





array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])



In [24]:









    Out[24]:





array([0, 1, 2])



In [ ]: