Use nolearn's DBN https://pythonhosted.org/nolearn/dbn.html
In [30]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
In [31]:
import sys
sys.path.append('..')
uncommoent the relevant pipeline in ../seizure_detection.py and run
cd ..
./doall data
or
./doall td
./doall tt
In [32]:
FEATURES = 'gen-8_allbands2-usf-w60-b0.2-b4-b8-b12-b30-b70'
In [33]:
nbands = 0
nwindows = 0
for p in FEATURES.split('-'):
if p[0] == 'b':
nbands += 1
elif p[0] == 'w':
nwindows = int(p[1:])
nbands -= 1
nbands, nwindows
Out[33]:
In [34]:
NUNITS = 1
In [35]:
from common.data import CachedDataLoader
cached_data_loader = CachedDataLoader('../data-cache')
In [36]:
def read_data(target, data_type):
fname = 'data_%s_%s_%s'%(data_type,target,FEATURES)
print fname
return cached_data_loader.load(fname,None)
In [37]:
def process(X, percentile=[0.1,0.5,0.9],nunits=NUNITS):
N, Nf = X.shape
print '# samples',N,'# power points', Nf
print '# channels', Nf / (nbands*nwindows)
newX = []
for i in range(N):
nw = nwindows//nunits
windows = X[i,:].reshape((nunits,nw,-1))
sorted_windows = np.sort(windows, axis=1)
features = np.concatenate([sorted_windows[:,int(p*nw),:] for p in percentile], axis=-1)
newX.append(features.ravel())
newX = np.array(newX)
return newX
In [40]:
from sklearn import preprocessing
from nolearn.dbn import DBN
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
min_max_scaler = preprocessing.MinMaxScaler() # scale features to be [0..1] which is DBN requirement
dbn = DBN(
[-1, 300, -1], # first layer has size X.shape[1], hidden layer(s), last layer will have number of classes in y (2))
learn_rates=0.3,
learn_rate_decays=0.9,
epochs=500,
dropouts=[0.1,0.5],
verbose=0,
)
clf = Pipeline([('min_max_scaler', min_max_scaler), ('dbn', dbn)])
In [41]:
fpout = open('../submissions/141105-predict.3.csv','w')
print >>fpout,'clip,preictal'
In [27]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
pdata = read_data(target, 'preictal') # positive examples
ndata = read_data(target, 'interictal') # negative examples
X = np.concatenate((pdata.X, ndata.X))
X = process(X)
_, NF = X.shape
X = scale.fit_transform(X)
X = np.clip(X,-3,5)
clf.set_params(dbn__layer_sizes=[NF,300,2]) # we need to reset each time because NF is different
y = np.zeros(X.shape[0])
y[:pdata.X.shape[0]] = 1
# shuffle
idxs=range(len(y))
random.shuffle(idxs)
X = X[idxs,:]
y = y[idxs]
# model
clf.fit(X,y)
# predict
tdata = read_data(target, 'test') # test examples
Xt = process(tdata.X)
Xt = scale.transform(Xt)
Xt = np.clip(Xt,-3,5)
y_proba = clf.predict_proba(Xt)[:,1]
# write results
for i,p in enumerate(y_proba):
print >>fpout,'%s_test_segment_%04d.mat,%.15f' % (target, i+1, p)
In [28]:
fpout.close()
In [29]:
pl.hist(Xt.ravel(),bins=50)
Out[29]:
In [ ]: