In [1]:
import numpy as np

from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.metrics import brier_score_loss
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV,calibration_curve
from sklearn import metrics

import matplotlib.pyplot as plt

In [2]:
np.random.seed(42)

In [3]:
X, y = make_classification(n_samples=9000,n_features=20,class_sep=0.5)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.80)

In [5]:
clf = LinearSVC()
clf.fit(X_train,y_train)


Out[5]:
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [33]:
plt.clf()

fig,axes = plt.subplots(1,3,sharey=True)

y_preds = clf.decision_function(X_test)

clf_preds = y_preds.reshape(-1,1)

fpr, tpr, _ = metrics.roc_curve(y_test, clf_preds)

auc_score = metrics.auc(fpr, tpr)

axes[0].set_title('ROC Curve - original classifier')
axes[0].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))

axes[0].plot([0,1],[0,1],'k:')

axes[0].set_xlim([-0.1,1.1])
axes[0].set_ylim([-0.1,1.1])
axes[0].set_ylabel('True Positive Rate')
axes[0].set_xlabel('False Positive Rate')

axes[0].legend(loc='lower right')


## CCV sigmoid

ccv_sig = CalibratedClassifierCV(clf,method='sigmoid',cv='prefit')
ccv_sig.fit(X_train,y_train)

y_preds = ccv_sig.predict_proba(X_test)

ccv_preds_sig = y_preds[:,1]

fpr, tpr, _ = metrics.roc_curve(y_test, ccv_preds_sig)

auc_score = metrics.auc(fpr, tpr)

axes[1].set_title('ROC Curve - Sigmoid Calibration')
axes[1].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))

axes[1].plot([0,1],[0,1],'k:')

axes[1].set_xlim([-0.1,1.1])
axes[1].set_ylim([-0.1,1.1])
axes[1].set_ylabel('True Positive Rate')
axes[1].set_xlabel('False Positive Rate')

axes[1].legend(loc='lower right')


## CCV isotonic

ccv_iso = CalibratedClassifierCV(clf,method='isotonic',cv='prefit')
ccv_iso.fit(X_train,y_train)

y_preds = ccv_iso.predict_proba(X_test)

ccv_preds_iso = y_preds[:,1]

fpr, tpr, _ = metrics.roc_curve(y_test, ccv_preds_iso)

auc_score = metrics.auc(fpr, tpr)

axes[2].set_title('ROC Curve - Isotonic Calibration')
axes[2].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))

axes[2].plot([0,1],[0,1],'k:')

axes[2].set_xlim([-0.1,1.1])
axes[2].set_ylim([-0.1,1.1])
axes[2].set_ylabel('True Positive Rate')
axes[2].set_xlabel('False Positive Rate')

axes[2].legend(loc='lower right')


plt.gcf().set_size_inches(15,5)
plt.show()


<matplotlib.figure.Figure at 0x7fde072f6a20>

In [17]:
clf_preds


Out[17]:
array([[-0.33145523],
       [-0.36691291],
       [-0.17986786],
       ...,
       [ 0.22549232],
       [ 0.33746451],
       [ 0.68203208]])

In [18]:
clf_preds.max()


Out[18]:
2.596224474271525

In [19]:
clf_preds.min()


Out[19]:
-2.285882731004038

In [28]:
clf_preds_normalized = (clf_preds+(clf_preds.min()*-1))
clf_preds_normalized = clf_preds_normalized/clf_preds_normalized.max()

In [29]:
clf_preds_normalized.max()


Out[29]:
1.0

In [36]:
plt.clf()

fig, axes = plt.subplots(1,2)

ax=axes[0]

ax.set_xlim([-0.1,1.1])
ax.set_ylim([-0.1,1.1])

ax.plot([0, 1], [0, 1], "k:", label="Perfect calibration")

clf_score = brier_score_loss(y_test, clf_preds_normalized, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, clf_preds_normalized, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "r-", label="No Calibration (Brier loss={:.3f})".format(clf_score))

clf_score = brier_score_loss(y_test, ccv_preds_sig, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, ccv_preds_sig, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "b-", label="Sigmoid Calibration (Brier loss={:.3f})".format(clf_score))

ax.legend(loc='lower right')
ax.set_title('Original vs Sigmoid Calibration - \n Linear SVM Classifier', size=16)
plt.subplots_adjust(top=0.85)


## ax 2

ax=axes[1]

ax.set_xlim([-0.1,1.1])
ax.set_ylim([-0.1,1.1])

ax.plot([0, 1], [0, 1], "k:", label="Perfect calibration")

clf_score = brier_score_loss(y_test, clf_preds_normalized, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, clf_preds_normalized, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "r-", label="No Calibration (Brier loss={:.3f})".format(clf_score))

clf_score = brier_score_loss(y_test, ccv_preds_iso, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, ccv_preds_iso, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "b-", label="Isotonic Calibration (Brier loss={:.3f})".format(clf_score))

ax.legend(loc='lower right')
ax.set_title('Original vs Isotonic Calibration - \n Linear SVM Classifier', size=16)
plt.subplots_adjust(top=0.85)

plt.gcf().set_size_inches(12,6)
plt.show()


<matplotlib.figure.Figure at 0x7fde07448390>

In [ ]: