Run Random Forest after combining two feature sets


In [16]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [17]:
import sys 
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall data

or

./doall td
./doall tt

In [18]:
FEATURES0 = 'gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9'
FEATURES1 = 'gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9'

In [19]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [20]:
def read_data(target, data_type, features):
    fname = 'data_%s_%s_%s'%(data_type,target,features)
    print fname
    return cached_data_loader.load(fname,None)

Predict


In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression as LR

clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, bootstrap=False,max_depth=10,
                             n_jobs=-1, max_features=15)#

In [22]:
fpout = open('../submissions/141026-predict.3.csv','w')
print >>fpout,'clip,preictal'

In [23]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    pdata0 = read_data(target, 'preictal', FEATURES0) # positive examples
    ndata0 = read_data(target, 'interictal', FEATURES0) # negative examples
    X0 = np.concatenate((pdata0.X, ndata0.X))
    y0 = np.zeros(X0.shape[0])
    y0[:pdata0.X.shape[0]] = 1
    
    pdata1 = read_data(target, 'preictal', FEATURES1) # positive examples
    ndata1 = read_data(target, 'interictal', FEATURES1) # negative examples
    X1 = np.concatenate((pdata1.X, ndata1.X))
    y1 = np.zeros(X1.shape[0])
    y1[:pdata1.X.shape[0]] = 1

    X = np.hstack((X0,X1))
    assert np.all(y0 == y1)
    y = y0
    
    # shuffle
    idxs=range(len(y))
    random.shuffle(idxs)
    X = X[idxs,:]
    y = y[idxs]
    
    clf.fit(X,y)
    # predict
    tdata0 = read_data(target, 'test', FEATURES0) # test examples
    Xt0 = tdata0.X
    tdata1 = read_data(target, 'test', FEATURES1) # test examples
    Xt1 = tdata1.X
    Xt = np.hstack((Xt0, Xt1))
    
    y_proba = clf.predict_proba(Xt)[:,1]
    
    # write results
    for i,p in enumerate(y_proba):
        print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)


data_preictal_Dog_1_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_1_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_1_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_1_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_1_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_1_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_2_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_2_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_2_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_2_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_2_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_2_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_3_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_3_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_3_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_3_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_3_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_3_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_4_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_4_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_4_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_4_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_4_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_4_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_5_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_5_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Dog_5_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Dog_5_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_5_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Dog_5_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Patient_1_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Patient_1_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Patient_1_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Patient_1_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Patient_1_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Patient_1_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Patient_2_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Patient_2_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_preictal_Patient_2_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_interictal_Patient_2_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Patient_2_gen-8_medianwindow1-bands2-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9
data_test_Patient_2_gen-8_medianwindow1-bands2--w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9

In [24]:
fpout.close()

In [24]: