In [3]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
In [4]:
import sys
sys.path.append('..')
In [5]:
def prb2logit(x):
return np.log(x/(1.-x))
def logit2prb(x):
return 1./(1+np.exp(-x))
From 140907-CV
In [6]:
fnamecv = '../data-cache/140907-CV.n3.gen8_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.pkl'
with open(fnamecv, 'rb') as fp:
target2iter2ys = pickle.load(fp)
In [74]:
from sklearn.metrics import roc_auc_score
def p(a,b):
return '%d E%d'%(1000*a,1000*b)
target2ys = {}
all_ytest = all_y_proba =None
all_aucs = []
# iterate over all targets (patients/dogs)
for target, iter2ys in target2iter2ys.iteritems():
target_ytest = target_y_proba =None # accumulate all results for that segment
target_aucs = []
print target,
# iterate over all segments of that patient/dog, each time a different single segment
# is used for testing
for ys in iter2ys.itervalues():
ytest = y_proba =None # accumulate all results for that segment
aucs = []
# iterate over 3 different shuffles to generate random training samples
for yt, yp in ys: # real/estimated
ytest = yt if ytest is None else np.concatenate((ytest,yt))
y_proba = yp if y_proba is None else np.concatenate((y_proba,yp))
aucs.append(roc_auc_score(yt, yp))
print p(roc_auc_score(ytest, y_proba), np.mean(aucs)), # segment results
target_aucs += aucs
target_ytest = ytest if target_ytest is None else np.concatenate((target_ytest,ytest))
target_y_proba = y_proba if target_y_proba is None else np.concatenate((target_y_proba,y_proba))
print target,p(roc_auc_score(target_ytest, target_y_proba),np.mean(target_aucs)) # target results
all_aucs += target_aucs
all_ytest = target_ytest if all_ytest is None else np.concatenate((all_ytest,target_ytest))
all_y_proba = target_y_proba if all_y_proba is None else np.concatenate((all_y_proba,target_y_proba))
# if target == 'Dog_3':
# pl.hist(target_aucs,alpha=0.5)
target2ys[target] = (target_ytest, prb2logit(target_y_proba))
print p(roc_auc_score(all_ytest, all_y_proba),np.mean(all_aucs)) # all data results
print
Use hyperopt to optimize the complex ROC_AUC function
In [95]:
# WEIGHTS = ['bias', 'Dog_1','Dog_2','Dog_3','Dog_4','Dog_5','Patient_1','Patient_2',]
WEIGHTS = ['Dog_1','Dog_2','Dog_3','Dog_4','Dog_5','Patient_1','Patient_2',]
def objective(args):
all_ytest = all_y_proba = None
for target, (target_ytest, target_y_proba) in target2ys.iteritems():
w = args[WEIGHTS.index(target)]
all_ytest = target_ytest if all_ytest is None else np.concatenate((all_ytest,target_ytest))
yp = w * target_y_proba
all_y_proba = yp if all_y_proba is None else np.concatenate((all_y_proba, yp))
# all_y_proba = logit2prb(all_y_proba + args[WEIGHTS.index('bias')])
all_y_proba = logit2prb(all_y_proba)
loss = 1.-roc_auc_score(all_ytest, all_y_proba)
return loss
define a search space for the different weights for different targets
In [96]:
from hyperopt import hp
space = (
# hp.normal('bias',0,1),
hp.loguniform('Dog_1', np.log(0.3), np.log(3.)),
hp.loguniform('Dog_2', np.log(0.3), np.log(3.)),
hp.loguniform('Dog_3', np.log(0.3), np.log(3.)),
hp.loguniform('Dog_4', np.log(0.3), np.log(3.)),
hp.loguniform('Dog_5', np.log(0.3), np.log(3.)),
hp.loguniform('Patient_1', np.log(0.3), np.log(3.)),
hp.loguniform('Patient_2', np.log(0.3), np.log(3.)),
)
minimize the objective over the space
In [97]:
from hyperopt import fmin, tpe
best = fmin(objective, space, algo=tpe.suggest, max_evals=1000)
In [98]:
best
Out[98]:
In [99]:
1.-objective([best[w] for w in WEIGHTS])
Out[99]:
In [100]:
1.-objective([0. if w=='bias' else 1. for w in WEIGHTS])
Out[100]:
In [101]:
fname = '../submissions/140906-predict-direct.2.csv'
In [102]:
!wc -l {fname}
!head {fname}
In [103]:
submission = pd.read_csv(fname)
In [104]:
# bias = best['bias']
bias = 0.
def calibrate(r):
target = '_'.join(r['clip'].split('_')[:2])
w = best[target]
return logit2prb(prb2logit(r.preictal) * w + bias)
In [105]:
submission.preictal = submission.apply(calibrate, axis=1)
In [107]:
submission.to_csv('../submissions/140912-rank-calibrate-1.1.csv', index=False)
In [109]:
!wc -l ../submissions/140912-rank-calibrate-1.1.csv
!head ../submissions/140912-rank-calibrate-1.1.csv
In [ ]: