In [47]:
import matplotlib.pylab as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
%matplotlib inline
In [13]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_features=1, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=4)
model = LogisticRegression().fit(X, y)
In [12]:
print(confusion_matrix(y, model.predict(X)))
In [14]:
print(classification_report(y, model.predict(X)))
In [15]:
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y, model.decision_function(X))
In [16]:
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], 'k--', label="random guess")
plt.xlabel('False Positive Rate (Fall-Out)')
plt.ylabel('True Positive Rate (Recall)')
plt.title('Receiver operating characteristic example')
plt.show()
In [26]:
fpr
Out[26]:
In [33]:
dfv = model.decision_function(X)
In [48]:
dfv2 = np.sort(dfv)
In [40]:
plt.plot(dfv2)
Out[40]:
In [50]:
idx = (dfv2<0 ).sum()
In [52]:
dfv2[idx-1], dfv2[idx]
Out[52]:
In [53]:
thresholds
Out[53]:
In [54]:
ids = np.sum(thresholds > 0)
In [57]:
tpr[ids-1], tpr[ids]
Out[57]:
In [60]:
fpr[ids-1], tpr[ids]
Out[60]:
In [61]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
X, y = make_classification(n_features=1, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=4)
model = LogisticRegression().fit(X,y)
In [62]:
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y, model.decision_function(X))
In [79]:
idx = (thresholds > 0).sum()
tpr0 = tpr[idx-1 : idx+1].mean()
fpr0 = fpr[idx-1 : idx+1].mean()
In [82]:
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], 'k--', label="random guess")
plt.plot(fpr0, tpr0, 'ro', ms=15, alpha=0.5)
plt.xlabel('False Positive Rate (Fall-Out)')
plt.ylabel('True Positive Rate (Recall)')
plt.title('Receiver operating characteristic example')
plt.show()
In [64]:
plt.plot
In [83]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
model = LogisticRegression().fit(iris.data, iris.target)
In [84]:
from sklearn.metrics import roc_curve
fpr0, tpr0, thresholds0 = roc_curve(iris.target, model.decision_function(iris.data)[:,0], pos_label=0)
fpr1, tpr1, thresholds1 = roc_curve(iris.target, model.decision_function(iris.data)[:,1], pos_label=1)
fpr2, tpr2, thresholds2 = roc_curve(iris.target, model.decision_function(iris.data)[:,2], pos_label=2)
In [100]:
idx0 = (thresholds0 > 0).sum()
idx1 = (thresholds1 > 0).sum()
idx2 = (thresholds2 > 0).sum()
fpr00 = fpr0[idx0-1:idx0+1 ].mean()
tpr00 = tpr0[idx0-1:idx0+1].mean()
fpr11 = fpr1[idx1-1:idx1+1 ].mean()
tpr11 = tpr1[idx1-1:idx1+1].mean()
fpr22 = fpr2[idx2-1:idx2+1 ].mean()
tpr22 = tpr2[idx2-1:idx2+1].mean()
In [101]:
plt.plot(fpr0, tpr0, "r-", label="class 0 ")
plt.plot(fpr1, tpr1, "g-", label="class 1")
plt.plot(fpr2, tpr2, "b-", label="class 2")
plt.plot([0, 1], [0, 1], 'k--', label="random guess")
plt.plot(fpr00, tpr00, "ro", ms=10, alpha=0.5)
plt.plot(fpr11, tpr11, "go", ms=10, alpha=0.5)
plt.plot(fpr22, tpr22, "bo", ms=10, alpha=0.5)
plt.xlim(-0.05, 1.0)
plt.ylim(0, 1.05)
plt.xlabel('False Positive Rate (Fall-Out)')
plt.ylabel('True Positive Rate (Recall)')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
In [ ]: