In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
In [2]:
import sys
sys.path.append('..')
uncommoent the relevant pipeline in ../seizure_detection.py and run
cd ..
./doall data
or
./doall td
./doall tt
In [3]:
FEATURES1 = 'gen-8_medianwindow-bands-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9'
In [ ]:
FEATURES2 = 'gen-8_medianwindow-bandstimecorr-usf-w60-b0.2-b4-b8-b12-b30-b70-0.1-0.5-0.9'
In [4]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')
In [5]:
def read_data(target, data_type, features):
fname = 'data_%s_%s_%s'%(data_type,target,features)
print fname
return cached_data_loader.load(fname,None)
In [6]:
PWEIGHT=0
In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression as LR
clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, bootstrap=False,max_depth=5,
n_jobs=-1)#
In [8]:
fpout = open('../submissions/140924-predict.1.csv','w')
print >>fpout,'clip,preictal'
In [9]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
pdata1 = read_data(target, 'preictal', FEATURES1) # positive examples
pdata2 = read_data(target, 'preictal', FEATURES2) # positive examples
ndata1 = read_data(target, 'interictal', FEATURES1) # negative examples
ndata2 = read_data(target, 'interictal', FEATURES2) # negative examples
X = np.concatenate((np.hstack((pdata1.X,pdata2.X)), np.hstack((ndata1.X,ndata2.X))))
y = np.zeros(X.shape[0])
y[:pdata.X.shape[0]] = 1
# shuffle
idxs=range(len(y))
random.shuffle(idxs)
X = X[idxs,:]
y = y[idxs]
# model
clf.fit(X,y,sample_weight=PWEIGHT*y+1)
# predict
tdata1 = read_data(target, 'test', FEATURES1) # test examples
tdata2 = read_data(target, 'test', FEATURES2) # test examples
y_proba = clf.predict_proba(np.hstack((tdata1.X,tdata2.X)))[:,1]
# write results
for i,p in enumerate(y_proba):
print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)
In [10]:
fpout.close()
In [20]: