In [25]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [26]:
import sys 
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall data

or

./doall td
./doall tt

In [27]:
FEATURES = 'gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70'

In [28]:
nbands = 0
nwindows = 0
for p in FEATURES.split('-'):
    if p[0] == 'b':
        nbands += 1
    elif p[0] == 'w':
        nwindows = int(p[1:])

nbands -= 1
nbands, nwindows


Out[28]:
(5, 60)

In [29]:
NUNITS = 1

In [30]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [31]:
def read_data(target, data_type):
    fname = 'data_%s_%s_%s'%(data_type,target,FEATURES)
    print fname
    return cached_data_loader.load(fname,None)

Predict


In [32]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, bootstrap=False,max_depth=10,
                             n_jobs=-1)#

In [33]:
fpout = open('../submissions/141104-predict.5.csv','w')
print >>fpout,'clip,preictal'

In [34]:
def process(X, percentile=[0.05,0.95]):
    N, Nf = X.shape
    print '# samples',N,'# power points', Nf
    print '# channels', Nf / (nbands*nwindows)
    
    newX = []
    for i in range(N):
        windows = X[i,:].reshape((nwindows,-1))
        sorted_windows = np.sort(windows, axis=0)
        features = np.concatenate([sorted_windows[int(p*nwindows),:] for p in percentile], axis=-1)
        newX.append(features)
    newX = np.array(newX)
    return newX

In [35]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    pdata = read_data(target, 'preictal') # positive examples
    ndata = read_data(target, 'interictal') # negative examples
    X = process(np.concatenate((pdata.X, ndata.X)))
    y = np.zeros(X.shape[0])
        
    y[:pdata.X.shape[0]] = 1
    # shuffle
    idxs=range(len(y))
    random.shuffle(idxs)
    X = X[idxs,:]
    y = y[idxs]
    
    if NUNITS > 1:
        NFu = X.shape[1]//NUNITS
        y = np.repeat(y,NUNITS)
        X = X.reshape(-1,NFu)
    clf.fit(X,y)
    # predict
    tdata = read_data(target, 'test') # test examples
    Xt = process(tdata.X)
    if NUNITS > 1:
        Xt = Xt.reshape(-1,NFu)
    
    y_proba = clf.predict_proba(Xt)[:,1]
    if NUNITS > 1:
        y_proba = y_proba.reshape(-1, NUNITS).max(axis=-1)
    
    # write results
    for i,p in enumerate(y_proba):
        print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)


data_preictal_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 664 # power points 4800
# channels 16
data_test_Dog_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 502 # power points 4800
# channels 16
data_preictal_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 822 # power points 4800
# channels 16
data_test_Dog_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 1000 # power points 4800
# channels 16
data_preictal_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 1992 # power points 4800
# channels 16
data_test_Dog_3_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 907 # power points 4800
# channels 16
data_preictal_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 1541 # power points 4800
# channels 16
data_test_Dog_4_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 990 # power points 4800
# channels 16
data_preictal_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 680 # power points 4500
# channels 15
data_test_Dog_5_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 191 # power points 4500
# channels 15
data_preictal_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 188 # power points 4500
# channels 15
data_test_Patient_1_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 195 # power points 4500
# channels 15
data_preictal_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
data_interictal_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 180 # power points 7200
# channels 24
data_test_Patient_2_gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70
# samples 150 # power points 7200
# channels 24

In [36]:
fpout.close()

In [36]: