notebook.community

Edit and run



In [1]:

    
from sklearn.metrics import roc_curve, recall_score, confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd



In [2]:

    
y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1])
y_score = np.array([0.2, 0.3, 0.6, 0.8, 0.4, 0.5, 0.7, 0.9])



In [3]:

    
print(y_score >= 0.5)









    



[False False  True  True False  True  True  True]



In [4]:

    
print((y_score >= 0.5).astype(int))









    



[0 0 1 1 0 1 1 1]



In [5]:

    
def fpr_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).flatten()
    return fp / (tn + fp)



In [6]:

    
print(fpr_score(y_true, y_score >= 0.5))

0.5



In [7]:

    
print(recall_score(y_true, y_score >= 0.5))



In [8]:

    
th_min = min(y_score)
print(th_min)

0.2



In [9]:

    
print((y_score >= th_min).astype(int))









    



[1 1 1 1 1 1 1 1]



In [10]:

    
print(fpr_score(y_true, y_score >= th_min))

1.0



In [11]:

    
print(recall_score(y_true, y_score >= th_min))

1.0



In [12]:

    
th_max = max(y_score) + 1
print(th_max)

1.9



In [13]:

    
print((y_score >= th_max).astype(int))









    



[0 0 0 0 0 0 0 0]



In [14]:

    
print(fpr_score(y_true, y_score >= th_max))

0.0



In [15]:

    
print(recall_score(y_true, y_score >= th_max))

0.0



In [16]:

    
df = pd.DataFrame({'true': y_true, 'score': y_score})



In [17]:

    
df['TPR'] = df.apply(lambda row: recall_score(y_true, y_score >= row['score']), axis=1)
df['FPR'] = df.apply(lambda row: fpr_score(y_true, y_score >= row['score']), axis=1)



In [18]:

    
print(df)









    



   true  score   TPR   FPR
0     0    0.2  1.00  1.00
1     0    0.3  1.00  0.75
2     0    0.6  0.50  0.50
3     0    0.8  0.25  0.25
4     1    0.4  1.00  0.50
5     1    0.5  0.75  0.50
6     1    0.7  0.50  0.25
7     1    0.9  0.25  0.00



In [19]:

    
print(df.sort_values('score', ascending=False))









    



   true  score   TPR   FPR
7     1    0.9  0.25  0.00
3     0    0.8  0.25  0.25
6     1    0.7  0.50  0.25
2     0    0.6  0.50  0.50
5     1    0.5  0.75  0.50
4     1    0.4  1.00  0.50
1     0    0.3  1.00  0.75
0     0    0.2  1.00  1.00



In [20]:

    
fpr_all, tpr_all, th_all = roc_curve(y_true, y_score,
                                     drop_intermediate=False)



In [21]:

    
df_roc = pd.DataFrame({'th_all': th_all, 'tpr_all': tpr_all, 'fpr_all': fpr_all})



In [22]:

    
print(df_roc)









    



   th_all  tpr_all  fpr_all
0     1.9     0.00     0.00
1     0.9     0.25     0.00
2     0.8     0.25     0.25
3     0.7     0.50     0.25
4     0.6     0.50     0.50
5     0.5     0.75     0.50
6     0.4     1.00     0.50
7     0.3     1.00     0.75
8     0.2     1.00     1.00



In [23]:

    
y_true_perfect = np.array([0, 0, 0, 0, 1, 1, 1, 1])
y_score_perfect = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])



In [24]:

    
print(y_true_perfect)









    



[0 0 0 0 1 1 1 1]



In [25]:

    
print((y_score_perfect >= 0.5).astype(int))









    



[0 0 0 0 1 1 1 1]



In [26]:

    
print(fpr_score(y_true_perfect, y_score_perfect >= 0.5))

0.0



In [27]:

    
print(recall_score(y_true_perfect, y_score_perfect >= 0.5))

1.0



In [28]:

    
roc_p = roc_curve(y_true_perfect, y_score_perfect, drop_intermediate=False)



In [29]:

    
plt.plot(roc_p[0], roc_p[1], marker='o')
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_perfect.png')
plt.close()



In [30]:

    
y_true_1 = np.array([0, 0, 0, 1, 0, 1, 1, 1])
y_score_1 = y_score_perfect



In [31]:

    
roc_1 = roc_curve(y_true_1, y_score_1, drop_intermediate=False)



In [32]:

    
y_true_2 = np.array([0, 0, 1, 1, 0, 0, 1, 1])
y_score_2 = y_score_perfect



In [33]:

    
roc_2 = roc_curve(y_true_2, y_score_2, drop_intermediate=False)



In [34]:

    
plt.plot(roc_p[0], roc_p[1], marker='s')
plt.plot(roc_1[0], roc_1[1], marker='o')
plt.plot(roc_2[0], roc_2[1], marker='x')
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_compare.png')
plt.close()



In [35]:

    
y_true_org = np.array([0, 0, 1, 1, 0, 0, 1, 1])
y_score_org = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])



In [36]:

    
roc_org = roc_curve(y_true_org, y_score_org, drop_intermediate=False)



In [37]:

    
y_score_scale = y_score_org / 2
print(y_score_scale)









    



[0.05 0.1  0.15 0.2  0.25 0.3  0.35 0.4 ]



In [38]:

    
roc_scale = roc_curve(y_true_org, y_score_scale, drop_intermediate=False)



In [39]:

    
y_score_interval = np.array([0.01, 0.02, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96])



In [40]:

    
roc_interval = roc_curve(y_true_org, y_score_interval, drop_intermediate=False)



In [41]:

    
plt.plot(roc_org[0], roc_org[1], marker='s')
plt.plot(roc_scale[0], roc_scale[1], marker='o', linestyle='-.')
plt.plot(roc_interval[0], roc_interval[1], marker='x', linestyle=':')
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_same.png')
plt.close()



In [42]:

    
s = pd.Series(y_score_interval)



In [43]:

    
print(s)









    



0    0.01
1    0.02
2    0.91
3    0.92
4    0.93
5    0.94
6    0.95
7    0.96
dtype: float64



In [44]:

    
print(s.rank())









    



0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
7    8.0
dtype: float64



In [45]:

    
np.random.seed(0)
y_true_random = np.array([0] * 5000 + [1] * 5000)
y_score_random = np.random.rand(10000)



In [46]:

    
roc_random = roc_curve(y_true_random, y_score_random)



In [47]:

    
plt.plot(roc_random[0], roc_random[1])
plt.xlabel('FPR: False positive rate')
plt.ylabel('TPR: True positive rate')
plt.grid()
plt.savefig('data/dst/sklearn_roc_curve_random.png')
plt.close()