In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [2]:
import sys 
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall data

or

./doall td
./doall tt

In [3]:
FEATURES = ['gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70','gen-8_alltimecorr-usf-w60']

In [4]:
MASKS = ['gen-8_maxdiff-60', None]

In [5]:
PERCENTILES =[[0.05, 0.95], [0.5,0.8,0.95]]

In [6]:
nbands = 0
nwindows = 0
for p in FEATURES[0].split('-'):
    if p[0] == 'b':
        nbands += 1
    elif p[0] == 'w':
        nwindows = int(p[1:])

nbands -= 1
nbands, nwindows


Out[6]:
(5, 60)

In [7]:
NUNITS = 2

In [8]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [9]:
def read_data(target, data_type, features=FEATURES):
    fname = 'data_%s_%s_%s'%(data_type,target,features)
    print fname
    return cached_data_loader.load(fname,None)

Predict


In [10]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, bootstrap=False,max_depth=10,
                             n_jobs=-1)#

In [11]:
fpout = open('../submissions/141111-predict.13.csv','w')
print >>fpout,'clip,preictal'

In [12]:
def process(X, X1, percentile=[0.05, 0.95], nunits=NUNITS):
    N, Nf = X.shape
    print '# samples',N,'# power points', Nf
    if X1 is not None:
        nchannels = X1.shape[1]/nwindows
        print '# channels', nchannels
    nb = Nf / nwindows
    print '# features per window', nb

    newX = []
    for i in range(N):
        nw = nwindows//nunits
        windows = X[i,:].reshape((nunits,nw,-1))
        if X1 is not None:
            mask = X1[i,:].reshape((nunits,nw,-1)) # max value for each channel
            for j in range(nunits):
                for k in range(nchannels):
                    m = mask[j,:,k] > 5000 # find large windows
                    if np.any(m):
    #                     print 'FIX', sum(m)
                        if not np.all(m): # make sure we had at least one good window so we can re use its values
                            # replace the bands of a large windows with the mean of the bands in all other windows
                            windows[j,m,k*nb:(k+1)*nb] = np.mean(windows[j,~m,k*nb:(k+1)*nb], axis=0)
        if percentile is not None:
            sorted_windows = np.sort(windows, axis=1)
            features = np.concatenate([sorted_windows[:,int(p*nw),:] for p in percentile], axis=-1)
        else:
            features =  windows
        newX.append(features.ravel())
    newX = np.array(newX)

    return newX

In [14]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    allX = []
    for features, mask, percentile in zip(FEATURES, MASKS, PERCENTILES):
        pdata = read_data(target, 'preictal',features) # positive examples
        ndata = read_data(target, 'interictal',features) # negative examples
        X = np.concatenate((pdata.X, ndata.X))

        if mask is not None:
            pdatam = read_data(target, 'preictal', mask) # positive examples
            ndatam = read_data(target, 'interictal', mask) # negative examples
            Xm = np.concatenate((pdatam.X, ndatam.X))
        else:
            Xm = None

        allX.append(process(X, Xm))
    X = np.hstack(allX)
    
    y = np.zeros(X.shape[0])
    y[:pdata.X.shape[0]] = 1
    
    # shuffle
    idxs=range(len(y))
    random.shuffle(idxs)
    X = X[idxs,:]
    y = y[idxs]
 
    clf.fit(X,y)
    
    allXt = []
    for features, mask, percentile in zip(FEATURES, MASKS, PERCENTILES):
        tdata = read_data(target, 'test', features) # test examples
        X = tdata.X

        if mask is not None:
            tdatam = read_data(target, 'test', mask) # positive examples
            Xm = tdatam.X
        else:
            Xm = None

        allXt.append(process(X, Xm))
    Xt = np.hstack(allXt)
    
    y_proba = clf.predict_proba(Xt)[:,1]
    
    # write results
    for i,p in enumerate(y_proba):
        print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)


data_preictal_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_1_gen-8_maxdiff-60
data_interictal_Dog_1_gen-8_maxdiff-60
# samples 664 # power points 4800
# channels 16
# features per window 80
data_preictal_Dog_1_gen-8_alltimecorr-usf-w60
data_interictal_Dog_1_gen-8_alltimecorr-usf-w60
# samples 664 # power points 8160
# features per window 136
data_test_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_1_gen-8_maxdiff-60
# samples 502 # power points 4800
# channels 16
# features per window 80
data_test_Dog_1_gen-8_alltimecorr-usf-w60
# samples 502 # power points 8160
# features per window 136
data_preictal_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_2_gen-8_maxdiff-60
data_interictal_Dog_2_gen-8_maxdiff-60
# samples 822 # power points 4800
# channels 16
# features per window 80
data_preictal_Dog_2_gen-8_alltimecorr-usf-w60
data_interictal_Dog_2_gen-8_alltimecorr-usf-w60
# samples 822 # power points 8160
# features per window 136
data_test_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_2_gen-8_maxdiff-60
# samples 1000 # power points 4800
# channels 16
# features per window 80
data_test_Dog_2_gen-8_alltimecorr-usf-w60
# samples 1000 # power points 8160
# features per window 136
data_preictal_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_3_gen-8_maxdiff-60
data_interictal_Dog_3_gen-8_maxdiff-60
# samples 1992 # power points 4800
# channels 16
# features per window 80
data_preictal_Dog_3_gen-8_alltimecorr-usf-w60
data_interictal_Dog_3_gen-8_alltimecorr-usf-w60
# samples 1992 # power points 8160
# features per window 136
data_test_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_3_gen-8_maxdiff-60
# samples 907 # power points 4800
# channels 16
# features per window 80
data_test_Dog_3_gen-8_alltimecorr-usf-w60
# samples 907 # power points 8160
# features per window 136
data_preictal_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_4_gen-8_maxdiff-60
data_interictal_Dog_4_gen-8_maxdiff-60
# samples 1541 # power points 4800
# channels 16
# features per window 80
data_preictal_Dog_4_gen-8_alltimecorr-usf-w60
data_interictal_Dog_4_gen-8_alltimecorr-usf-w60
# samples 1541 # power points 8160
# features per window 136
data_test_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_4_gen-8_maxdiff-60
# samples 990 # power points 4800
# channels 16
# features per window 80
data_test_Dog_4_gen-8_alltimecorr-usf-w60
# samples 990 # power points 8160
# features per window 136
data_preictal_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Dog_5_gen-8_maxdiff-60
data_interictal_Dog_5_gen-8_maxdiff-60
# samples 680 # power points 4500
# channels 15
# features per window 75
data_preictal_Dog_5_gen-8_alltimecorr-usf-w60
data_interictal_Dog_5_gen-8_alltimecorr-usf-w60
# samples 680 # power points 7200
# features per window 120
data_test_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Dog_5_gen-8_maxdiff-60
# samples 191 # power points 4500
# channels 15
# features per window 75
data_test_Dog_5_gen-8_alltimecorr-usf-w60
# samples 191 # power points 7200
# features per window 120
data_preictal_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Patient_1_gen-8_maxdiff-60
data_interictal_Patient_1_gen-8_maxdiff-60
# samples 188 # power points 4500
# channels 15
# features per window 75
data_preictal_Patient_1_gen-8_alltimecorr-usf-w60
data_interictal_Patient_1_gen-8_alltimecorr-usf-w60
# samples 188 # power points 7200
# features per window 120
data_test_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Patient_1_gen-8_maxdiff-60
# samples 195 # power points 4500
# channels 15
# features per window 75
data_test_Patient_1_gen-8_alltimecorr-usf-w60
# samples 195 # power points 7200
# features per window 120
data_preictal_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_preictal_Patient_2_gen-8_maxdiff-60
data_interictal_Patient_2_gen-8_maxdiff-60
# samples 180 # power points 7200
# channels 24
# features per window 120
data_preictal_Patient_2_gen-8_alltimecorr-usf-w60
data_interictal_Patient_2_gen-8_alltimecorr-usf-w60
# samples 180 # power points 18000
# features per window 300
data_test_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_test_Patient_2_gen-8_maxdiff-60
# samples 150 # power points 7200
# channels 24
# features per window 120
data_test_Patient_2_gen-8_alltimecorr-usf-w60
# samples 150 # power points 18000
# features per window 300

In [15]:
fpout.close()

In [41]: