In [1]:
from sklearn.metrics import roc_curve, recall_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
In [2]:
y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1])
y_score = np.array([0.2, 0.3, 0.6, 0.8, 0.4, 0.5, 0.7, 0.9])
In [3]:
print(y_score >= 0.5)
In [4]:
print((y_score >= 0.5).astype(int))
In [5]:
def fpr_score(y_true, y_pred):
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).flatten()
return fp / (tn + fp)
In [6]:
print(fpr_score(y_true, y_score >= 0.5))
In [7]:
print(recall_score(y_true, y_score >= 0.5))
In [8]:
th_min = min(y_score)
print(th_min)
In [9]:
print((y_score >= th_min).astype(int))
In [10]:
print(fpr_score(y_true, y_score >= th_min))
In [11]:
print(recall_score(y_true, y_score >= th_min))
In [12]:
th_max = max(y_score) + 1
print(th_max)
In [13]:
print((y_score >= th_max).astype(int))
In [14]:
print(fpr_score(y_true, y_score >= th_max))
In [15]:
print(recall_score(y_true, y_score >= th_max))
In [16]:
df = pd.DataFrame({'true': y_true, 'score': y_score})
In [17]:
df['TPR'] = df.apply(lambda row: recall_score(y_true, y_score >= row['score']), axis=1)
df['FPR'] = df.apply(lambda row: fpr_score(y_true, y_score >= row['score']), axis=1)
In [18]:
print(df)
In [19]:
print(df.sort_values('score', ascending=False))
In [20]:
fpr_all, tpr_all, th_all = roc_curve(y_true, y_score,
drop_intermediate=False)
In [21]:
df_roc = pd.DataFrame({'th_all': th_all, 'tpr_all': tpr_all, 'fpr_all': fpr_all})
In [22]:
print(df_roc)
In [23]:
y_true_perfect = np.array([0, 0, 0, 0, 1, 1, 1, 1])
y_score_perfect = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
In [24]:
print(y_true_perfect)
In [25]:
print((y_score_perfect >= 0.5).astype(int))
In [26]:
print(fpr_score(y_true_perfect, y_score_perfect >= 0.5))
In [27]:
print(recall_score(y_true_perfect, y_score_perfect >= 0.5))
In [28]:
roc_p = roc_curve(y_true_perfect, y_score_perfect, drop_intermediate=False)
In [29]:
plt.plot(roc_p[0], roc_p[1], marker='o')
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_perfect.png')
plt.close()
In [30]:
y_true_1 = np.array([0, 0, 0, 1, 0, 1, 1, 1])
y_score_1 = y_score_perfect
In [31]:
roc_1 = roc_curve(y_true_1, y_score_1, drop_intermediate=False)
In [32]:
y_true_2 = np.array([0, 0, 1, 1, 0, 0, 1, 1])
y_score_2 = y_score_perfect
In [33]:
roc_2 = roc_curve(y_true_2, y_score_2, drop_intermediate=False)
In [34]:
plt.plot(roc_p[0], roc_p[1], marker='s')
plt.plot(roc_1[0], roc_1[1], marker='o')
plt.plot(roc_2[0], roc_2[1], marker='x')
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_compare.png')
plt.close()
In [35]:
y_true_org = np.array([0, 0, 1, 1, 0, 0, 1, 1])
y_score_org = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])
In [36]:
roc_org = roc_curve(y_true_org, y_score_org, drop_intermediate=False)
In [37]:
y_score_scale = y_score_org / 2
print(y_score_scale)
In [38]:
roc_scale = roc_curve(y_true_org, y_score_scale, drop_intermediate=False)
In [39]:
y_score_interval = np.array([0.01, 0.02, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96])
In [40]:
roc_interval = roc_curve(y_true_org, y_score_interval, drop_intermediate=False)
In [41]:
plt.plot(roc_org[0], roc_org[1], marker='s')
plt.plot(roc_scale[0], roc_scale[1], marker='o', linestyle='-.')
plt.plot(roc_interval[0], roc_interval[1], marker='x', linestyle=':')
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_same.png')
plt.close()
In [42]:
s = pd.Series(y_score_interval)
In [43]:
print(s)
In [44]:
print(s.rank())
In [45]:
np.random.seed(0)
y_true_random = np.array([0] * 5000 + [1] * 5000)
y_score_random = np.random.rand(10000)
In [46]:
roc_random = roc_curve(y_true_random, y_score_random)
In [47]:
plt.plot(roc_random[0], roc_random[1])
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_random.png')
plt.close()