In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [2]:
import sys
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall data

or

./doall td
./doall tt

In [3]:
FEATURES = 'gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600'

In [4]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [5]:
def read_data(target, data_type):
    fname = 'data_%s_%s_%s'%(data_type,target,FEATURES)
    print fname
    return cached_data_loader.load(fname,None)

Predict


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression as LR

clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, max_depth=10,bootstrap=False,
                             n_jobs=-1)

In [7]:
fpout = open('../submissions/140922-predict.1.csv','w')
print >>fpout,'clip,preictal'

In [18]:
Xall = None
yall = None
target_list = ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5']
for i, target in enumerate(target_list):
    pdata = read_data(target, 'preictal') # positive examples
    ndata = read_data(target, 'interictal') # negative examples
    X = np.concatenate((pdata.X, ndata.X))
    Xextra = np.zeros((X.shape[0], 1+len(target_list)))
    Xextra[:,0] = int(target.startswith('Dog'))
    Xextra[:,1+i] = 1.
    X = np.hstack((X,Xextra))
    Xall = np.concatenate((Xall, X)) if Xall is not None else X
    
    y = np.zeros(X.shape[0])
    y[:pdata.X.shape[0]] = 1
    yall = np.concatenate((yall, y)) if yall is not None else y


data_preictal_Dog_1_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_interictal_Dog_1_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_preictal_Dog_2_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_interictal_Dog_2_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_preictal_Dog_3_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_interictal_Dog_3_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_preictal_Dog_4_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_interictal_Dog_4_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_preictal_Dog_5_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
data_interictal_Dog_5_gen8_medianwindow1-fft-with-time-freq-corr-1-48-r400-usf-w600
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-c94745f29a03> in <module>()
     10     Xextra[:,1+i] = 1.
     11     X = np.hstack((X,Xextra))
---> 12     Xall = np.concatenate((Xall, X)) if Xall is not None else X
     13 
     14     y = np.zeros(X.shape[0])

ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [14]:
X.shape


Out[14]:
(664, 3072)

In [13]:
np.hstack((X,np.zeros((664,4)))).shape


Out[13]:
(664, 3076)

In [63]:
# shuffle
    idxs=range(len(y))
    random.shuffle(idxs)
    X = X[idxs,:]
    y = y[idxs]
    # model
    clf.fit(X,y)
    # predict
    tdata = read_data(target, 'test') # test examples
    y_proba = clf.predict_proba(tdata.X)[:,1]
    # write results
    for i,p in enumerate(y_proba):
        print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)


data_preictal_Dog_1_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Dog_1_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Dog_1_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_preictal_Dog_2_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Dog_2_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Dog_2_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_preictal_Dog_3_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Dog_3_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Dog_3_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_preictal_Dog_4_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Dog_4_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Dog_4_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_preictal_Dog_5_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Dog_5_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Dog_5_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_preictal_Patient_1_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Patient_1_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Patient_1_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_preictal_Patient_2_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_interictal_Patient_2_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600
data_test_Patient_2_gen8_medianwindow-fft-with-time-freq-cov2-1-48-r400-usf-w600

In [64]:
fpout.close()

In [64]: