In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [2]:
import sys
sys.path.append('..')

Read precomputed features

uncommoent the relevant pipeline in ../seizure_detection.py and run

cd ..
./doall predict

In [3]:
FEATURES = 'gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600'

In [4]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')

In [5]:
def read_data(target, data_type):
    return cached_data_loader.load('data_%s_%s_%s'%(data_type,target,FEATURES),None)

Predict


In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression as LR

clf = RandomForestClassifier(n_estimators=3000, min_samples_split=1, max_depth=10,
                             n_jobs=-1)

In [7]:
fpout = open('../submissions/140906-predict.csv','w')
print >>fpout,'clip,preictal'

In [8]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    pdata = read_data(target, 'preictal') # positive examples
    ndata = read_data(target, 'interictal') # negative examples
    X = np.concatenate((pdata.X, ndata.X))
    y = np.zeros(X.shape[0])
    y[:pdata.X.shape[0]] = 1
    # shuffle
    idxs=range(len(y))
    random.shuffle(idxs)
    X = X[idxs,:]
    y = y[idxs]
    #CV
    skf = StratifiedKFold(y, n_folds=3)
    y_all_proba = np.zeros(y.shape)
    y_all_count = np.zeros(y.shape)

    for train, test in skf:
        clf.fit(X[train,:],y[train])
        y_proba = clf.predict_proba(X[test,:])[:,1]
        auc = roc_auc_score(y[test], y_proba)
        y_all_proba[test] += y_proba
        y_all_count[test] += 1
        print auc
    y_all_proba /= y_all_count
    print target, roc_auc_score(y, y_all_proba)
    # calibration model
    lr = LR()                                                       
    lr.fit( y_all_proba.reshape( -1, 1 ), y )     # LR needs X to be 2-dimensional
    # model
    clf.fit(X,y)
    # predict
    tdata = read_data(target, 'test') # test examples
    y_proba = clf.predict_proba(tdata.X)[:,1]
    y_calibrated = lr.predict_proba(y_proba.reshape( -1, 1 ))[:,1]
    # write results
    for i,p in enumerate(y_calibrated):
        print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)


Loaded ../data-cache/data_preictal_Dog_1_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Dog_1_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
0.980714285714
0.955714285714
0.992095588235
Dog_1 0.974399038462
Loaded ../data-cache/data_test_Dog_1_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_preictal_Dog_2_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Dog_2_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
0.999705507019
0.999901835673
1.0
Dog_2 0.999549450549
Loaded ../data-cache/data_test_Dog_2_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_preictal_Dog_3_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Dog_3_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
0.989342948718
0.995813301282
0.981069711538
Dog_3 0.9888065349
Loaded ../data-cache/data_test_Dog_3_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_preictal_Dog_4_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Dog_4_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
0.990550842908
0.984242456781
0.988537528186
Dog_4 0.987556223678
Loaded ../data-cache/data_test_Dog_4_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_preictal_Dog_5_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Dog_5_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
0.998939393939
1.0
0.999069767442
Dog_5 0.999299145299
Loaded ../data-cache/data_test_Dog_5_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_preictal_Patient_1_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Patient_1_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
1.0
1.0
1.0
Patient_1 0.999743589744
Loaded ../data-cache/data_test_Patient_1_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_preictal_Patient_2_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
Loaded ../data-cache/data_interictal_Patient_2_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s
1.0
1.0
0.989010989011
Patient_2 0.9942002442
Loaded ../data-cache/data_test_Patient_2_gen4_medianwindow-fft-with-time-freq-corr-1-48-r400-usf-w600.hkl in 0s

In [9]:
fpout.close()

In [ ]: