In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC
from sklearn.metrics import brier_score_loss
from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV,calibration_curve
from sklearn import metrics
import matplotlib.pyplot as plt
In [2]:
np.random.seed(42)
In [3]:
X, y = make_classification(n_samples=9000,n_features=20,class_sep=0.5)
In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.80)
In [5]:
clf = LinearSVC()
clf.fit(X_train,y_train)
Out[5]:
In [33]:
plt.clf()
fig,axes = plt.subplots(1,3,sharey=True)
y_preds = clf.decision_function(X_test)
clf_preds = y_preds.reshape(-1,1)
fpr, tpr, _ = metrics.roc_curve(y_test, clf_preds)
auc_score = metrics.auc(fpr, tpr)
axes[0].set_title('ROC Curve - original classifier')
axes[0].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))
axes[0].plot([0,1],[0,1],'k:')
axes[0].set_xlim([-0.1,1.1])
axes[0].set_ylim([-0.1,1.1])
axes[0].set_ylabel('True Positive Rate')
axes[0].set_xlabel('False Positive Rate')
axes[0].legend(loc='lower right')
## CCV sigmoid
ccv_sig = CalibratedClassifierCV(clf,method='sigmoid',cv='prefit')
ccv_sig.fit(X_train,y_train)
y_preds = ccv_sig.predict_proba(X_test)
ccv_preds_sig = y_preds[:,1]
fpr, tpr, _ = metrics.roc_curve(y_test, ccv_preds_sig)
auc_score = metrics.auc(fpr, tpr)
axes[1].set_title('ROC Curve - Sigmoid Calibration')
axes[1].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))
axes[1].plot([0,1],[0,1],'k:')
axes[1].set_xlim([-0.1,1.1])
axes[1].set_ylim([-0.1,1.1])
axes[1].set_ylabel('True Positive Rate')
axes[1].set_xlabel('False Positive Rate')
axes[1].legend(loc='lower right')
## CCV isotonic
ccv_iso = CalibratedClassifierCV(clf,method='isotonic',cv='prefit')
ccv_iso.fit(X_train,y_train)
y_preds = ccv_iso.predict_proba(X_test)
ccv_preds_iso = y_preds[:,1]
fpr, tpr, _ = metrics.roc_curve(y_test, ccv_preds_iso)
auc_score = metrics.auc(fpr, tpr)
axes[2].set_title('ROC Curve - Isotonic Calibration')
axes[2].plot(fpr, tpr, label='AUC = {:.5f}'.format(auc_score))
axes[2].plot([0,1],[0,1],'k:')
axes[2].set_xlim([-0.1,1.1])
axes[2].set_ylim([-0.1,1.1])
axes[2].set_ylabel('True Positive Rate')
axes[2].set_xlabel('False Positive Rate')
axes[2].legend(loc='lower right')
plt.gcf().set_size_inches(15,5)
plt.show()
In [17]:
clf_preds
Out[17]:
In [18]:
clf_preds.max()
Out[18]:
In [19]:
clf_preds.min()
Out[19]:
In [28]:
clf_preds_normalized = (clf_preds+(clf_preds.min()*-1))
clf_preds_normalized = clf_preds_normalized/clf_preds_normalized.max()
In [29]:
clf_preds_normalized.max()
Out[29]:
In [36]:
plt.clf()
fig, axes = plt.subplots(1,2)
ax=axes[0]
ax.set_xlim([-0.1,1.1])
ax.set_ylim([-0.1,1.1])
ax.plot([0, 1], [0, 1], "k:", label="Perfect calibration")
clf_score = brier_score_loss(y_test, clf_preds_normalized, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, clf_preds_normalized, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "r-", label="No Calibration (Brier loss={:.3f})".format(clf_score))
clf_score = brier_score_loss(y_test, ccv_preds_sig, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, ccv_preds_sig, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "b-", label="Sigmoid Calibration (Brier loss={:.3f})".format(clf_score))
ax.legend(loc='lower right')
ax.set_title('Original vs Sigmoid Calibration - \n Linear SVM Classifier', size=16)
plt.subplots_adjust(top=0.85)
## ax 2
ax=axes[1]
ax.set_xlim([-0.1,1.1])
ax.set_ylim([-0.1,1.1])
ax.plot([0, 1], [0, 1], "k:", label="Perfect calibration")
clf_score = brier_score_loss(y_test, clf_preds_normalized, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, clf_preds_normalized, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "r-", label="No Calibration (Brier loss={:.3f})".format(clf_score))
clf_score = brier_score_loss(y_test, ccv_preds_iso, pos_label=1)
fraction_of_positives, mean_predicted_value = calibration_curve(y_test, ccv_preds_iso, n_bins=30)
ax.plot(mean_predicted_value, fraction_of_positives, "b-", label="Isotonic Calibration (Brier loss={:.3f})".format(clf_score))
ax.legend(loc='lower right')
ax.set_title('Original vs Isotonic Calibration - \n Linear SVM Classifier', size=16)
plt.subplots_adjust(top=0.85)
plt.gcf().set_size_inches(12,6)
plt.show()
In [ ]: