In [1]:
import pandas as pd
In [75]:
c = pd.read_csv('resources/classification.csv')
In [83]:
tp = len(c[(c['true'] == c['pred']) & (c['true'] == 1)])
fp = len(c[(c['true'] == 0) & (c['pred'] == 1)])
fn = len(c[(c['true'] == 1) & (c['pred'] == 0)])
tn = len(c[(c['true'] == c['pred']) & (c['true'] == 0)])
In [84]:
print("Total: {}".format(len(c)))
print("Total: {}".format((tp + fp + fn + tn)))
print("{} {} {} {}".format(tp, fp, fn, tn))
In [31]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
ac = accuracy_score(c['true'], c['pred'])
ps = precision_score(c['true'], c['pred'])
rs = recall_score(c['true'], c['pred'])
f1 = f1_score(c['true'], c['pred'])
print("{} {} {} {}".format(ac, ps, rs, f1))
In [33]:
s = pd.read_csv('resources/scores.csv')
s.head()
Out[33]:
In [39]:
from sklearn.metrics import roc_auc_score
lr = roc_auc_score(s['true'], s['score_logreg'])
svm = roc_auc_score(s['true'], s['score_svm'])
knn = roc_auc_score(s['true'], s['score_knn'])
tree = roc_auc_score(s['true'], s['score_tree'])
print("{} {} {} {}".format(lr, svm, knn, tree))
In [73]:
from sklearn.metrics import precision_recall_curve
#lr = precision_recall_curve(s['true'], s['score_logreg'])
lr_prec, lr_recall, lr_thres = precision_recall_curve(s['true'], s['score_logreg'])
lr_df = pd.DataFrame([lr_prec, lr_recall, lr_thres]).transpose()
lr_df.columns = ['precision', 'recall', 'thresholds']
svm_prec, svm_recall, svm_thres = precision_recall_curve(s['true'], s['score_svm'])
svm_df = pd.DataFrame([svm_prec, svm_recall, svm_thres]).transpose()
svm_df.columns = ['precision', 'recall', 'thresholds']
knn_prec, knn_recall, knn_thres = precision_recall_curve(s['true'], s['score_knn'])
knn_df = pd.DataFrame([knn_prec, knn_recall, knn_thres]).transpose()
knn_df.columns = ['precision', 'recall', 'thresholds']
t_prec, t_recall, t_thres = precision_recall_curve(s['true'], s['score_tree'])
t_df = pd.DataFrame([t_prec, t_recall, t_thres]).transpose()
t_df.columns = ['precision', 'recall', 'thresholds']
print("LR max precision: {}".format(lr_df[lr_df['recall'] >= 0.70]['precision'].describe()['max']))
print("Svm max precision: {}".format(svm_df[svm_df['recall'] >= 0.70]['precision'].describe()['max']))
print("Knn max precision: {}".format(knn_df[knn_df['recall'] >= 0.70]['precision'].describe()['max']))
print("Tree max precision: {}".format(t_df[t_df['recall'] >= 0.70]['precision'].describe()['max']))