How to pass class names to ConfusionMatrix

This is a follow up to issue #244 and PR #253, to document how to pass class names to ConfusionMatrix


In [1]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split as tts

from yellowbrick.classifier import ConfusionMatrix

In [2]:
iris = load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)

In [3]:
## target names are a list of strings corresponding to the classes
classes = iris.target_names
classes


Out[3]:
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [4]:
model = LogisticRegression()

cm = ConfusionMatrix(model, classes=classes)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
cm.show()


/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/metrics/classification.py:258: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if np.all([l not in y_true for l in labels]):
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-974003cb3a2a> in <module>()
      3 cm = ConfusionMatrix(model, classes=classes)
      4 cm.fit(X_train, y_train)
----> 5 cm.score(X_test, y_test)
      6 cm.show()

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/yellowbrick/classifier/confusion_matrix.py in score(self, X, y, **kwargs)
    175         # Compute the confusion matrix and class counts
    176         self.confusion_matrix_ = confusion_matrix_metric(
--> 177             y, y_pred, labels=self.classes_, sample_weight=self.sample_weight
    178         )
    179         self.class_counts_ = self.class_counts(y)

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/metrics/classification.py in confusion_matrix(y_true, y_pred, labels, sample_weight)
    257         labels = np.asarray(labels)
    258         if np.all([l not in y_true for l in labels]):
--> 259             raise ValueError("At least one label specified must be in y_true")
    260 
    261     if sample_weight is None:

ValueError: At least one label specified must be in y_true

:(

Workaround:


In [5]:
cm = ConfusionMatrix(
    model, classes=classes, 
    label_encoder={0: 'setosa', 1: 'versicolor', 2: 'virginica'}
)
cm.fit(X_train, y_train)
cm.score(X_test, y_test)
cm.show()



In [ ]: