In [3]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [4]:
import sys
sys.path.append('..')

In [5]:
def prb2logit(x):
    return np.log(x/(1.-x))
def logit2prb(x):
    return 1./(1+np.exp(-x))

Read CV for best features/model

From 140907-CV


In [6]:
fnamecv = '../data-cache/140907-CV.n3.gen8_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.pkl'
with open(fnamecv, 'rb') as fp:
    target2iter2ys = pickle.load(fp)

In [74]:
from sklearn.metrics import roc_auc_score
def p(a,b):
    return '%d E%d'%(1000*a,1000*b)

target2ys = {}

all_ytest = all_y_proba =None
all_aucs = []
# iterate over all targets (patients/dogs)
for target, iter2ys in target2iter2ys.iteritems():
    target_ytest = target_y_proba =None # accumulate all results for that segment
    target_aucs = []
    print target,
    # iterate over all segments of that patient/dog, each time a different single segment
    # is used for testing
    for ys in iter2ys.itervalues():
        ytest = y_proba =None # accumulate all results for that segment
        aucs = []
        # iterate over 3 different shuffles to generate random training samples
        for yt, yp in ys: # real/estimated
            ytest = yt if ytest is None else np.concatenate((ytest,yt))
            y_proba = yp if y_proba is None else np.concatenate((y_proba,yp))
            aucs.append(roc_auc_score(yt, yp))
        print p(roc_auc_score(ytest, y_proba), np.mean(aucs)), # segment results
        target_aucs += aucs
        target_ytest = ytest if target_ytest is None else np.concatenate((target_ytest,ytest))
        target_y_proba = y_proba if target_y_proba is None else np.concatenate((target_y_proba,y_proba))
    print target,p(roc_auc_score(target_ytest, target_y_proba),np.mean(target_aucs)) # target results
    all_aucs += target_aucs        
    all_ytest = target_ytest if all_ytest is None else np.concatenate((all_ytest,target_ytest))
    all_y_proba = target_y_proba if all_y_proba is None else np.concatenate((all_y_proba,target_y_proba))
#     if target == 'Dog_3':
#         pl.hist(target_aucs,alpha=0.5)
    target2ys[target] = (target_ytest, prb2logit(target_y_proba))
print p(roc_auc_score(all_ytest, all_y_proba),np.mean(all_aucs)) # all data results
print


Dog_2 884 E916 908 E925 901 E927 Dog_2 898 E923
Dog_3 700 E707 705 E713 706 E702 Dog_3 703 E707
Dog_1 553 E551 541 E534 607 E610 Dog_1 568 E565
Dog_4 652 E697 657 E685 663 E680 Dog_4 656 E688
Dog_5 947 E954 930 E927 936 E934 Dog_5 938 E938
Patient_2 953 E970 971 E976 966 E998 Patient_2 964 E981
Patient_1 975 E998 950 E985 987 E981 Patient_1 970 E988
775 E774

Optimize target weights

Use hyperopt to optimize the complex ROC_AUC function


In [95]:
# WEIGHTS = ['bias', 'Dog_1','Dog_2','Dog_3','Dog_4','Dog_5','Patient_1','Patient_2',]
WEIGHTS = ['Dog_1','Dog_2','Dog_3','Dog_4','Dog_5','Patient_1','Patient_2',]
def objective(args):
    all_ytest = all_y_proba = None
    for target, (target_ytest, target_y_proba) in target2ys.iteritems():
        w = args[WEIGHTS.index(target)]
        all_ytest = target_ytest if all_ytest is None else np.concatenate((all_ytest,target_ytest))
        yp = w * target_y_proba
        all_y_proba = yp if all_y_proba is None else np.concatenate((all_y_proba, yp))
#     all_y_proba = logit2prb(all_y_proba + args[WEIGHTS.index('bias')])
    all_y_proba = logit2prb(all_y_proba)
    loss = 1.-roc_auc_score(all_ytest, all_y_proba)
    return loss

define a search space for the different weights for different targets


In [96]:
from hyperopt import hp
space = (
#          hp.normal('bias',0,1),
        hp.loguniform('Dog_1', np.log(0.3), np.log(3.)),
        hp.loguniform('Dog_2', np.log(0.3), np.log(3.)),
        hp.loguniform('Dog_3', np.log(0.3), np.log(3.)),
        hp.loguniform('Dog_4', np.log(0.3), np.log(3.)),
        hp.loguniform('Dog_5', np.log(0.3), np.log(3.)),
        hp.loguniform('Patient_1', np.log(0.3), np.log(3.)),
        hp.loguniform('Patient_2', np.log(0.3), np.log(3.)),
    )

minimize the objective over the space


In [97]:
from hyperopt import fmin, tpe
best = fmin(objective, space, algo=tpe.suggest, max_evals=1000)

In [98]:
best


Out[98]:
{'Dog_1': 1.1549584811248075,
 'Dog_2': 1.8207281523097631,
 'Dog_3': 1.6682958471066418,
 'Dog_4': 0.4463345797781237,
 'Dog_5': 1.881232899511874,
 'Patient_1': 1.5508701193427903,
 'Patient_2': 1.2209716284227234}

In [99]:
1.-objective([best[w] for w in WEIGHTS])


Out[99]:
0.79770657603448825

In [100]:
1.-objective([0. if w=='bias' else 1. for w in WEIGHTS])


Out[100]:
0.77511414525936573

Read best submission


In [101]:
fname = '../submissions/140906-predict-direct.2.csv'

In [102]:
!wc -l {fname}
!head {fname}


    3936 ../submissions/140906-predict-direct.2.csv
clip,preictal
Dog_1_test_segment_0001.mat,0.528314974292854
Dog_1_test_segment_0002.mat,0.095106974579729
Dog_1_test_segment_0003.mat,0.086835065708654
Dog_1_test_segment_0004.mat,0.205226974175210
Dog_1_test_segment_0005.mat,0.159707544705429
Dog_1_test_segment_0006.mat,0.234699807847022
Dog_1_test_segment_0007.mat,0.143023108985720
Dog_1_test_segment_0008.mat,0.167520311762236
Dog_1_test_segment_0009.mat,0.099032089496216

In [103]:
submission = pd.read_csv(fname)

In [104]:
# bias = best['bias']
bias = 0.
def calibrate(r):
    target = '_'.join(r['clip'].split('_')[:2])
    w = best[target]
    return logit2prb(prb2logit(r.preictal) * w + bias)

In [105]:
submission.preictal = submission.apply(calibrate, axis=1)

In [107]:
submission.to_csv('../submissions/140912-rank-calibrate-1.1.csv', index=False)

In [109]:
!wc -l ../submissions/140912-rank-calibrate-1.1.csv
!head ../submissions/140912-rank-calibrate-1.1.csv


    3936 ../submissions/140912-rank-calibrate-1.1.csv
clip,preictal
Dog_1_test_segment_0001.mat,0.5326909435276488
Dog_1_test_segment_0002.mat,0.06901578162344549
Dog_1_test_segment_0003.mat,0.06194807728839789
Dog_1_test_segment_0004.mat,0.17310999177881967
Dog_1_test_segment_0005.mat,0.1281183060242578
Dog_1_test_segment_0006.mat,0.20341011781729287
Dog_1_test_segment_0007.mat,0.11226181027109657
Dog_1_test_segment_0008.mat,0.13566758122461256
Dog_1_test_segment_0009.mat,0.07241452689454828

In [ ]: