In [1]:
import pandas as pd

In [75]:
c = pd.read_csv('resources/classification.csv')

In [83]:
tp = len(c[(c['true'] == c['pred']) & (c['true'] == 1)])
fp = len(c[(c['true'] == 0) & (c['pred'] == 1)])
fn = len(c[(c['true'] == 1) & (c['pred'] == 0)])
tn = len(c[(c['true'] == c['pred']) & (c['true'] == 0)])

In [84]:
print("Total: {}".format(len(c)))
print("Total: {}".format((tp + fp + fn + tn)))
print("{} {} {} {}".format(tp, fp, fn, tn))


Total: 200
Total: 200
43 34 59 64

In [31]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

ac = accuracy_score(c['true'], c['pred'])
ps = precision_score(c['true'], c['pred'])
rs = recall_score(c['true'], c['pred'])
f1 = f1_score(c['true'], c['pred'])

print("{} {} {} {}".format(ac, ps, rs, f1))


0.535 0.558441558442 0.421568627451 0.480446927374

In [33]:
s = pd.read_csv('resources/scores.csv')
s.head()


Out[33]:
true score_logreg score_svm score_knn score_tree
0 0 0.683832 0.145976 0.787063 0.500000
1 1 0.801966 0.239511 1.000000 0.833333
2 0 0.382315 -0.245701 0.000000 0.000000
3 1 0.506797 -0.137058 0.000000 0.105263
4 1 0.488781 -0.154148 0.000000 0.105263

In [39]:
from sklearn.metrics import roc_auc_score

lr = roc_auc_score(s['true'], s['score_logreg'])
svm = roc_auc_score(s['true'], s['score_svm'])
knn = roc_auc_score(s['true'], s['score_knn'])
tree = roc_auc_score(s['true'], s['score_tree'])

print("{} {} {} {}".format(lr, svm, knn, tree))


0.71918767507 0.708683473389 0.635154061625 0.691926770708

In [73]:
from sklearn.metrics import precision_recall_curve

#lr = precision_recall_curve(s['true'], s['score_logreg'])

lr_prec, lr_recall, lr_thres = precision_recall_curve(s['true'], s['score_logreg'])
lr_df = pd.DataFrame([lr_prec, lr_recall, lr_thres]).transpose()
lr_df.columns = ['precision', 'recall', 'thresholds']

svm_prec, svm_recall, svm_thres = precision_recall_curve(s['true'], s['score_svm'])
svm_df = pd.DataFrame([svm_prec, svm_recall, svm_thres]).transpose()
svm_df.columns = ['precision', 'recall', 'thresholds']

knn_prec, knn_recall, knn_thres = precision_recall_curve(s['true'], s['score_knn'])
knn_df = pd.DataFrame([knn_prec, knn_recall, knn_thres]).transpose()
knn_df.columns = ['precision', 'recall', 'thresholds']

t_prec, t_recall, t_thres = precision_recall_curve(s['true'], s['score_tree'])
t_df = pd.DataFrame([t_prec, t_recall, t_thres]).transpose()
t_df.columns = ['precision', 'recall', 'thresholds']

print("LR   max precision: {}".format(lr_df[lr_df['recall'] >= 0.70]['precision'].describe()['max']))
print("Svm  max precision: {}".format(svm_df[svm_df['recall'] >= 0.70]['precision'].describe()['max']))
print("Knn  max precision: {}".format(knn_df[knn_df['recall'] >= 0.70]['precision'].describe()['max']))
print("Tree max precision: {}".format(t_df[t_df['recall'] >= 0.70]['precision'].describe()['max']))


LR   max precision: 0.63025210084
Svm  max precision: 0.622807017544
Knn  max precision: 0.606557377049
Tree max precision: 0.651785714286