In [14]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
from collections import defaultdict

In [15]:
import sys 
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall data

or

./doall td
./doall tt

In [16]:
FEATURES = 'gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70'

In [17]:
FEATURES1 = 'gen-8_maxdiff-60'

In [18]:
nbands = 0
nwindows = 0
for p in FEATURES.split('-'):
    if p[0] == 'b':
        nbands += 1
    elif p[0] == 'w':
        nwindows = int(p[1:])

nbands -= 1
nbands, nwindows


Out[18]:
(5, 60)

In [19]:
NUNITS = 2

In [20]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [21]:
def read_data(target, data_type, features=FEATURES):
    fname = 'data_%s_%s_%s'%(data_type,target,features)
    print fname
    return cached_data_loader.load(fname,None)

Predict


In [22]:
from sklearn.ensemble import GradientBoostingClassifier 
clf = GradientBoostingClassifier(n_estimators=1000,learning_rate=0.04,max_depth=10,max_features=18)

In [23]:
fpout = open('../submissions/141107-predict.4.csv','w')
print >>fpout,'clip,preictal'

In [24]:
def process(X, X1, percentile=[0.05,0.95], nunits=NUNITS, mask_level=7000):
    N, Nf = X.shape
    print '# samples',N,'# power points', Nf
    nchannels = Nf / (nbands*nwindows)
    print '# channels', nchannels

    fix = defaultdict(int)
    newX = []
    for i in range(N):
        nw = nwindows//nunits
        windows = X[i,:].reshape((nunits,nw,-1))
        mask = X1[i,:].reshape((nunits,nw,-1)) # max value for each channel
        for j in range(nunits):
            for k in range(nchannels):
                m = mask[j,:,k] > mask_level # find large windows
                if np.any(m):
#                     print 'FIX', sum(m)
                    fix[sum(m)] += 1
                    if not np.all(m): # make sure we had at least one good window so we can re use its values
                        # replace the bands of a large windows with the mean of the bands in all other windows
                        windows[j,m,k*nbands:(k+1)*nbands] = np.mean(windows[j,~m,k*nbands:(k+1)*nbands], axis=0)
        sorted_windows = np.sort(windows, axis=1)
        features = np.concatenate([sorted_windows[:,int(p*nw),:] for p in percentile], axis=-1)
        newX.append(features.ravel())
    newX = np.array(newX)
    print sorted(fix.items())
    return newX

In [25]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    pdata = read_data(target, 'preictal') # positive examples
    ndata = read_data(target, 'interictal') # negative examples
    X = np.concatenate((pdata.X, ndata.X))

    pdata1 = read_data(target, 'preictal', FEATURES1) # positive examples
    ndata1 = read_data(target, 'interictal', FEATURES1) # negative examples
    X1 = np.concatenate((pdata1.X, ndata1.X))

    X = process(X, X1)
    
    y = np.zeros(X.shape[0])
    y[:pdata.X.shape[0]] = 1
    
    # shuffle
    idxs=range(len(y))
    random.shuffle(idxs)
    X = X[idxs,:]
    y = y[idxs]
 
    # predict
    tdata = read_data(target, 'test') # test examples
    tdata1 = read_data(target, 'test', FEATURES1) # test examples
    Xt = process(tdata.X, tdata1.X)

    clf.fit(X,y)
    y_proba = clf.predict_proba(Xt)[:,1]
    
    # write results
    for i,p in enumerate(y_proba):
        print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)


data_preictal_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_1_gen-8_maxdiff-60
data_interictal_Dog_1_gen-8_maxdiff-60
# samples 664 # power points 4800
# channels 16
[]
data_test_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_1_gen-8_maxdiff-60
# samples 502 # power points 4800
# channels 16
[]
data_preictal_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_2_gen-8_maxdiff-60
data_interictal_Dog_2_gen-8_maxdiff-60
# samples 822 # power points 4800
# channels 16
[]
data_test_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_2_gen-8_maxdiff-60
# samples 1000 # power points 4800
# channels 16
[]
data_preictal_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_3_gen-8_maxdiff-60
data_interictal_Dog_3_gen-8_maxdiff-60
# samples 1992 # power points 4800
# channels 16
[]
data_test_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_3_gen-8_maxdiff-60
# samples 907 # power points 4800
# channels 16
[]
data_preictal_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_4_gen-8_maxdiff-60
data_interictal_Dog_4_gen-8_maxdiff-60
# samples 1541 # power points 4800
# channels 16
[]
data_test_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_4_gen-8_maxdiff-60
# samples 990 # power points 4800
# channels 16
[]
data_preictal_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_5_gen-8_maxdiff-60
data_interictal_Dog_5_gen-8_maxdiff-60
# samples 680 # power points 4500
# channels 15
[]
data_test_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_5_gen-8_maxdiff-60
# samples 191 # power points 4500
# channels 15
[]
data_preictal_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Patient_1_gen-8_maxdiff-60
data_interictal_Patient_1_gen-8_maxdiff-60
# samples 188 # power points 4500
# channels 15
[(2, 45), (3, 6), (5, 8), (7, 5), (8, 2), (11, 15), (14, 17), (15, 42), (16, 2), (17, 15), (18, 58), (19, 62), (20, 39), (21, 50), (22, 41), (23, 109), (24, 54), (25, 96), (26, 165), (27, 150), (28, 30), (29, 135), (30, 240)]
data_test_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Patient_1_gen-8_maxdiff-60
# samples 195 # power points 4500
# channels 15
[(1, 50), (3, 30)]
data_preictal_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Patient_2_gen-8_maxdiff-60
data_interictal_Patient_2_gen-8_maxdiff-60
# samples 180 # power points 7200
# channels 24
[(1, 189)]
data_test_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Patient_2_gen-8_maxdiff-60
# samples 150 # power points 7200
# channels 24
[(1, 3), (2, 1), (3, 5), (4, 2), (5, 4), (6, 4), (7, 6), (8, 9), (9, 4), (10, 5), (12, 3), (13, 3), (14, 12), (16, 4)]

In [26]:
fpout.close()

In [27]:
!head ../submissions/141107-predict.4.csv


clip,preictal
Dog_1_test_segment_0001.mat,0.039796705155399
Dog_1_test_segment_0002.mat,0.001189008479951
Dog_1_test_segment_0003.mat,0.001138364966418
Dog_1_test_segment_0004.mat,0.000980343777225
Dog_1_test_segment_0005.mat,0.001764961905016
Dog_1_test_segment_0006.mat,0.020611940105987
Dog_1_test_segment_0007.mat,0.000465648400312
Dog_1_test_segment_0008.mat,0.055968551521437
Dog_1_test_segment_0009.mat,0.000410524376910

In [ ]: