In [47]:
import matplotlib.pylab as plt
import matplotlib as mpl
import pandas as pd
import numpy as np

%matplotlib inline

In [13]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_features=1, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=4)
model = LogisticRegression().fit(X, y)

In [12]:
print(confusion_matrix(y, model.predict(X)))


[[47  2]
 [ 3 48]]

In [14]:
print(classification_report(y, model.predict(X)))


             precision    recall  f1-score   support

          0       0.94      0.96      0.95        49
          1       0.96      0.94      0.95        51

avg / total       0.95      0.95      0.95       100


In [15]:
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y, model.decision_function(X))

In [16]:
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], 'k--', label="random guess")
plt.xlabel('False Positive Rate (Fall-Out)')
plt.ylabel('True Positive Rate (Recall)')
plt.title('Receiver operating characteristic example')
plt.show()



In [26]:
fpr


Out[26]:
array([ 0.        ,  0.        ,  0.02040816,  0.02040816,  0.04081633,
        0.04081633,  0.12244898,  0.12244898,  0.7755102 ,  0.7755102 ,  1.        ])

In [33]:
dfv = model.decision_function(X)

In [48]:
dfv2 = np.sort(dfv)

In [40]:
plt.plot(dfv2)


Out[40]:
[<matplotlib.lines.Line2D at 0x7f59753e4a20>]

In [50]:
idx = (dfv2<0 ).sum()

In [52]:
dfv2[idx-1], dfv2[idx]


Out[52]:
(-0.12815748448981312, 0.35807011034117164)

In [53]:
thresholds


Out[53]:
array([ 7.24206317,  1.14148507,  0.56194883,  0.38045956,  0.35807011,
       -0.12815748, -0.97494752, -1.04809826, -3.76360833, -4.2628357 ,
       -6.33200169])

In [54]:
ids = np.sum(thresholds > 0)

In [57]:
tpr[ids-1], tpr[ids]


Out[57]:
(0.94117647058823528, 0.96078431372549022)

In [60]:
fpr[ids-1], tpr[ids]


Out[60]:
(0.040816326530612242, 0.96078431372549022)

In [61]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_features=1, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=4)
model = LogisticRegression().fit(X,y)

In [62]:
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y, model.decision_function(X))

In [79]:
idx = (thresholds > 0).sum()
tpr0 = tpr[idx-1 : idx+1].mean()
fpr0 = fpr[idx-1 : idx+1].mean()

In [82]:
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], 'k--', label="random guess")
plt.plot(fpr0, tpr0, 'ro', ms=15, alpha=0.5)
plt.xlabel('False Positive Rate (Fall-Out)')
plt.ylabel('True Positive Rate (Recall)')
plt.title('Receiver operating characteristic example')
plt.show()



In [64]:
plt.plot

In [83]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
model = LogisticRegression().fit(iris.data, iris.target)

In [84]:
from sklearn.metrics import roc_curve
fpr0, tpr0, thresholds0 = roc_curve(iris.target, model.decision_function(iris.data)[:,0], pos_label=0)
fpr1, tpr1, thresholds1 = roc_curve(iris.target, model.decision_function(iris.data)[:,1], pos_label=1)
fpr2, tpr2, thresholds2 = roc_curve(iris.target, model.decision_function(iris.data)[:,2], pos_label=2)

In [100]:
idx0 = (thresholds0 > 0).sum()
idx1 = (thresholds1 > 0).sum()
idx2 = (thresholds2 > 0).sum()
fpr00 = fpr0[idx0-1:idx0+1 ].mean()
tpr00 = tpr0[idx0-1:idx0+1].mean()
fpr11 = fpr1[idx1-1:idx1+1 ].mean()
tpr11 = tpr1[idx1-1:idx1+1].mean()
fpr22 = fpr2[idx2-1:idx2+1 ].mean()
tpr22 = tpr2[idx2-1:idx2+1].mean()

In [101]:
plt.plot(fpr0, tpr0, "r-", label="class 0 ")
plt.plot(fpr1, tpr1, "g-", label="class 1")
plt.plot(fpr2, tpr2, "b-", label="class 2")
plt.plot([0, 1], [0, 1], 'k--', label="random guess")
plt.plot(fpr00, tpr00, "ro", ms=10, alpha=0.5)
plt.plot(fpr11, tpr11, "go", ms=10, alpha=0.5)
plt.plot(fpr22, tpr22, "bo", ms=10, alpha=0.5)
plt.xlim(-0.05, 1.0)
plt.ylim(0, 1.05)
plt.xlabel('False Positive Rate (Fall-Out)')
plt.ylabel('True Positive Rate (Recall)')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()



In [ ]: