In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
from collections import defaultdict

In [2]:
import sys
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall data

In [32]:
FEATURES = 'gen-8_medianwindow-bandstimecorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9'

In [33]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [34]:
def read_data(target, data_type):
    return cached_data_loader.load('data_%s_%s_%s'%(data_type,target,FEATURES),None)

Predict


In [35]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression as LR

clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, bootstrap=False,max_depth=20,
                             n_jobs=-1) #

In [36]:
with_weights = False
suffix = ''

Compute AUC for each target separatly


In [37]:
target2iter2ys = {}
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    # positive examples
    pdata = read_data(target, 'preictal')
    Np, NF = pdata.X.shape
    assert 3*(NF//3) == NF
    
    # split the positive examples into segments, each from the same event
    # in each CV-split we will take all examples from the same segment to either train or validate
    segments = []
    start = 0
    last_l = 0
    for i,l in enumerate(pdata.latencies):
        if l<last_l:
            segments.append(np.arange(start,i))
            start = i
        last_l = l
    segments.append(np.arange(start,i+1))
    Ns = len(segments)

    # negative examples
    ndata = read_data(target, 'interictal')
    Nn = ndata.X.shape[0]
    
    npratio = float(Nn)/Np
    print target,1/(1+npratio),Ns,Np,Nn

    iter2ys = defaultdict(list) # {niter: Ns *[(ytest,y_proba)]
    for s in segments:
        for niter in range(3):
            # each time, take one segment for testing and randomly pick negative examples
            Xtestp = pdata.X[s,:]
            weightstest = pdata.latencies[s] # latency for first segment is 1

            Ntrainp = len(s)
            Ntrainn = int(Ntrainp*npratio)
            n = np.array(random.sample(xrange(Nn),Ntrainn))
            Xtestn = ndata.X[n,:]

            Xtrainp = pdata.X[-s,:]
            weightsp = pdata.latencies[-s] # latency for first segment is 1
            Xtrainn = ndata.X[-n,:]
            weightsn = np.ones(Xtrainn.shape[0]) 

            Xtrain = np.concatenate((Xtrainp,Xtrainn))
            weights = np.concatenate((weightsp,weightsn))
            ytrain = np.concatenate((np.ones(Ntrainp),np.zeros(Ntrainn)))
            perm = np.random.permutation(len(ytrain))
            ytrain = ytrain[perm]
            Xtrain = Xtrain[perm,:]
            weights = weights[perm]

            Xtest = np.concatenate((Xtestp,Xtestn))
            ytest = np.concatenate((np.ones(Xtestp.shape[0]),np.zeros(Xtestn.shape[0])))

            if with_weights:
                clf.fit(Xtrain, ytrain, sample_weight=weights)
            else:
                clf.fit(Xtrain, ytrain)

            y_proba = clf.predict_proba(Xtest)[:,1]
            iter2ys[niter].append((ytest, y_proba))
            
            auc = roc_auc_score(ytest, y_proba)
            print '%.3f'%auc,Ntrainp,np.mean(weightstest)
    target2iter2ys[target] = iter2ys
    print


Dog_1 0.277108433735 4 184 480
0.722 46 3.5
0.803 46 3.5
0.739 46 3.5
0.543 46 3.5
0.509 46 3.5
0.494 46 3.5
0.515 46 3.5
0.516 46 3.5
0.553 46 3.5
0.544 46 3.5
0.540 46 3.5
0.413 46 3.5

Dog_2 0.391727493917 7 322 500
0.967 46 3.5
0.965 46 3.5
0.991 46 3.5
0.914 46 3.5
0.899 46 3.5
0.887 46 3.5
0.993 46 3.5
0.999 46 3.5
0.992 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
0.984 46 3.5
1.000 46 3.5
1.000 46 3.5
0.999 46 3.5
0.999 46 3.5
0.996 46 3.5
0.843 46 3.5
0.896 46 3.5
0.863 46 3.5

Dog_3 0.277108433735 12 552 1440
0.966 46 3.5
0.909 46 3.5
0.961 46 3.5
0.287 46 3.5
0.387 46 3.5
0.433 46 3.5
0.761 46 3.5
0.756 46 3.5
0.745 46 3.5
0.571 46 3.5
0.700 46 3.5
0.666 46 3.5
0.715 46 3.5
0.875 46 3.5
0.755 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
0.882 46 3.5
0.898 46 3.5
0.914 46 3.5
0.734 46 3.5
0.604 46 3.5
0.537 46 3.5
0.502 46 3.5
0.709 46 3.5
0.620 46 3.5
0.590 46 3.5
0.488 46 3.5
0.533 46 3.5
0.222 46 3.5
0.236 46 3.5
0.246 46 3.5

Dog_4 0.478260869565 17 737 804
0.997 46 3.5
0.936 46 3.5
0.994 46 3.5
0.570 46 3.5
0.792 46 3.5
0.654 46 3.5
0.845 46 3.5
0.882 46 3.5
0.765 46 3.5
0.397 37 4.0
0.571 37 4.0
0.566 37 4.0
0.639 19 5.0
0.401 19 5.0
0.458 19 5.0
0.766 46 3.5
0.564 46 3.5
0.570 46 3.5
0.654 46 3.5
0.609 46 3.5
0.621 46 3.5
0.919 37 4.0
0.924 37 4.0
0.918 37 4.0
0.351 46 3.5
0.275 46 3.5
0.392 46 3.5
0.403 46 3.5
0.535 46 3.5
0.584 46 3.5
0.783 46 3.5
0.742 46 3.5
0.831 46 3.5
0.771 46 3.5
0.756 46 3.5
0.648 46 3.5
0.626 46 3.5
0.583 46 3.5
0.513 46 3.5
0.333 46 3.5
0.383 46 3.5
0.222 46 3.5
0.467 46 3.5
0.706 46 3.5
0.596 46 3.5
0.405 46 3.5
0.574 46 3.5
0.574 46 3.5
0.507 46 3.5
0.401 46 3.5
0.705 46 3.5

Dog_5 0.338235294118 5 230 450
0.927 46 3.5
0.949 46 3.5
0.941 46 3.5
0.997 46 3.5
0.998 46 3.5
0.999 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
0.968 46 3.5
0.995 46 3.5
0.986 46 3.5
0.918 46 3.5
0.843 46 3.5
0.903 46 3.5

Patient_1 0.734042553191 3 138 50
0.829 46 3.5
0.779 46 3.5
0.997 46 3.5
1.000 46 3.5
1.000 46 3.5
1.000 46 3.5
0.971 46 3.5
0.967 46 3.5
0.936 46 3.5

Patient_2 0.766666666667 3 138 42
0.792 46 3.5
0.957 46 3.5
0.932 46 3.5
0.998 46 3.5
1.000 46 3.5
1.000 46 3.5
0.998 46 3.5
0.767 46 3.5
0.783 46 3.5


In [38]:
NF


Out[38]:
4500

In [39]:
fname = '../data-cache/140907-CV.%s%s.pkl'%(suffix, FEATURES)
with open(fname,'wb') as fp:
    pickle.dump(target2iter2ys,fp,-1)

In [40]:
fname


Out[40]:
'../data-cache/140907-CV.gen-8_medianwindow-bandstimecorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9.pkl'

Generate a single AUC score


In [41]:
def p(a,b):
    return '%d E%d'%(1000*a,1000*b)

for f in [
            'gen-8_medianwindow-bandstimecorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'maxdepth5.gen-8_medianwindow-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth5.gen-8_medianwindow-bandscorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'gen-8_medianwindow-bandscorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth2.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth5.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth5.p15.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth5.p5.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth5.p59.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'max_depth10.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9',
            'n3.gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600',
#             'n3.1.gen8_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600',
#             'n3.w.gen8_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600',
#             'n3.w.gen16_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600',
        ]:
    all_ytest = all_y_proba =None
    all_aucs = []
    with open('../data-cache/140907-CV.%s.pkl'%f,'rb') as fp:
        target2iter2ys = pickle.load(fp)
    for target, iter2ys in target2iter2ys.iteritems():
        target_ytest = target_y_proba =None
        target_aucs = []
        print target,
        for ys in iter2ys.itervalues():
            ytest = y_proba =None
            aucs = []
            for y in ys:
                yt, yp = y
                ytest = yt if ytest is None else np.concatenate((ytest,yt))
                y_proba = yp if y_proba is None else np.concatenate((y_proba,yp))
                aucs.append(roc_auc_score(yt, yp))
            print p(roc_auc_score(ytest, y_proba), np.mean(aucs)),
            target_aucs += aucs
            target_ytest = ytest if target_ytest is None else np.concatenate((target_ytest,ytest))
            target_y_proba = y_proba if target_y_proba is None else np.concatenate((target_y_proba,y_proba))
        print target,p(roc_auc_score(target_ytest, target_y_proba),np.mean(target_aucs))
        all_aucs += target_aucs        
        all_ytest = target_ytest if all_ytest is None else np.concatenate((all_ytest,target_ytest))
        all_y_proba = target_y_proba if all_y_proba is None else np.concatenate((all_y_proba,target_y_proba))
#         if target == 'Dog_3':
#             pl.hist(target_aucs,alpha=0.5)
    print f,p(roc_auc_score(all_ytest, all_y_proba),np.mean(all_aucs))
    print


Dog_2 941 E957 959 E965 935 E961 Dog_2 945 E961
Dog_3 666 E685 700 E713 675 E700 Dog_3 680 E700
Dog_1 586 E580 601 E591 571 E549 Dog_1 586 E574
Dog_4 585 E613 600 E625 594 E624 Dog_4 593 E621
Dog_5 935 E962 936 E957 943 E965 Dog_5 938 E961
Patient_2 889 E929 855 E907 888 E904 Patient_2 879 E914
Patient_1 938 E933 892 E915 974 E977 Patient_1 935 E942
gen-8_medianwindow-bandstimecorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 748 E752

Dog_2 882 E934 900 E930 858 E912 Dog_2 881 E925
Dog_3 769 E783 787 E802 756 E777 Dog_3 770 E787
Dog_1 606 E603 614 E638 562 E554 Dog_1 594 E598
Dog_4 614 E621 668 E684 611 E635 Dog_4 631 E647
Dog_5 931 E931 941 E937 943 E940 Dog_5 938 E936
Patient_2 911 E884 953 E977 959 E979 Patient_2 939 E947
Patient_1 759 E756 914 E921 933 E891 Patient_1 873 E856
maxdepth5.gen-8_medianwindow-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 777 E773

Dog_2 882 E915 875 E918 869 E912 Dog_2 876 E915
Dog_3 752 E761 734 E728 742 E752 Dog_3 743 E747
Dog_1 584 E583 588 E586 587 E586 Dog_1 586 E585
Dog_4 611 E595 627 E631 640 E627 Dog_4 626 E618
Dog_5 912 E939 920 E936 912 E926 Dog_5 914 E934
Patient_2 936 E968 939 E975 935 E940 Patient_2 937 E961
Patient_1 927 E902 905 E892 939 E939 Patient_1 922 E911
max_depth5.gen-8_medianwindow-bandscorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 767 E755

Dog_2 872 E927 856 E897 873 E909 Dog_2 867 E911
Dog_3 731 E729 730 E731 732 E740 Dog_3 731 E733
Dog_1 620 E609 600 E593 637 E626 Dog_1 619 E609
Dog_4 591 E566 651 E656 623 E615 Dog_4 622 E612
Dog_5 941 E949 906 E910 935 E952 Dog_5 927 E937
Patient_2 902 E923 940 E966 898 E963 Patient_2 910 E951
Patient_1 962 E974 961 E978 914 E891 Patient_1 945 E948
gen-8_medianwindow-bandscorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 764 E753

Dog_2 848 E895 826 E854 850 E875 Dog_2 842 E875
Dog_3 733 E758 710 E736 736 E776 Dog_3 727 E757
Dog_1 489 E495 468 E462 501 E503 Dog_1 486 E487
Dog_4 574 E609 577 E574 562 E592 Dog_4 571 E591
Dog_5 826 E873 823 E859 837 E889 Dog_5 830 E874
Patient_2 869 E920 923 E939 908 E920 Patient_2 903 E927
Patient_1 823 E764 854 E808 769 E772 Patient_1 813 E781
max_depth2.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 709 E720

Dog_2 894 E925 892 E925 913 E945 Dog_2 899 E931
Dog_3 775 E786 787 E799 777 E780 Dog_3 780 E788
Dog_1 613 E615 585 E578 629 E624 Dog_1 609 E606
Dog_4 640 E653 578 E587 599 E613 Dog_4 605 E618
Dog_5 937 E932 943 E942 958 E962 Dog_5 946 E945
Patient_2 923 E954 916 E973 925 E901 Patient_2 918 E942
Patient_1 915 E881 833 E764 863 E860 Patient_1 871 E835
max_depth5.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 776 E764

Dog_2 877 E909 888 E926 913 E938 Dog_2 892 E924
Dog_3 775 E775 774 E775 788 E804 Dog_3 779 E784
Dog_1 562 E561 575 E567 568 E558 Dog_1 568 E562
Dog_4 578 E588 574 E580 578 E567 Dog_4 577 E578
Dog_5 930 E933 936 E925 937 E944 Dog_5 933 E934
Patient_2 956 E996 765 E763 848 E814 Patient_2 860 E858
Patient_1 855 E821 889 E872 840 E796 Patient_1 858 E829
max_depth5.p15.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 759 E739

Dog_2 890 E930 898 E937 900 E924 Dog_2 896 E930
Dog_3 782 E799 773 E782 793 E811 Dog_3 783 E797
Dog_1 585 E584 549 E546 611 E627 Dog_1 581 E586
Dog_4 580 E566 596 E595 606 E632 Dog_4 595 E598
Dog_5 920 E935 906 E917 933 E944 Dog_5 920 E932
Patient_2 892 E866 951 E944 857 E900 Patient_2 901 E903
Patient_1 889 E904 890 E860 833 E778 Patient_1 873 E847
max_depth5.p5.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 765 E755

Dog_2 916 E942 895 E919 893 E912 Dog_2 901 E924
Dog_3 787 E806 770 E771 769 E781 Dog_3 776 E786
Dog_1 633 E647 578 E582 629 E638 Dog_1 613 E622
Dog_4 565 E590 576 E571 607 E615 Dog_4 582 E592
Dog_5 942 E943 921 E923 944 E954 Dog_5 936 E940
Patient_2 850 E870 867 E922 937 E943 Patient_2 887 E911
Patient_1 837 E781 894 E857 871 E881 Patient_1 866 E840
max_depth5.p59.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 768 E753

Dog_2 894 E956 903 E928 875 E916 Dog_2 890 E934
Dog_3 792 E801 772 E790 784 E795 Dog_3 784 E795
Dog_1 606 E601 624 E610 624 E605 Dog_1 618 E605
Dog_4 600 E619 572 E574 576 E583 Dog_4 583 E592
Dog_5 948 E947 945 E959 939 E938 Dog_5 944 E948
Patient_2 881 E849 888 E842 894 E899 Patient_2 886 E864
Patient_1 759 E650 921 E938 887 E896 Patient_1 861 E828
max_depth10.gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 770 E752

Dog_2 906 E940 875 E913 901 E929 Dog_2 894 E927
Dog_3 774 E788 752 E768 777 E790 Dog_3 768 E782
Dog_1 608 E591 637 E638 632 E631 Dog_1 626 E620
Dog_4 629 E648 586 E587 578 E583 Dog_4 599 E606
Dog_5 961 E962 947 E947 958 E962 Dog_5 955 E957
Patient_2 878 E893 835 E922 876 E919 Patient_2 865 E911
Patient_1 783 E805 715 E750 912 E911 Patient_1 810 E822
gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9 772 E758

Dog_2 912 E964 875 E924 880 E925 Dog_2 889 E937
Dog_3 715 E724 731 E735 750 E751 Dog_3 732 E737
Dog_1 638 E626 626 E633 643 E635 Dog_1 636 E632
Dog_4 620 E658 586 E590 591 E608 Dog_4 599 E619
Dog_5 940 E951 953 E960 934 E958 Dog_5 942 E956
Patient_2 949 E977 874 E822 923 E952 Patient_2 916 E917
Patient_1 881 E896 902 E901 915 E923 Patient_1 899 E907
n3.gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600 763 E759


In [ ]: