In [ ]:
###################################################################
## Generate spectrogram features using short-time Fourier transform
###################################################################
import os
from scipy.io import loadmat
from scipy.signal import resample
import pickle
import scipy, numpy as np
def stft(x, fftsize=1024, overlap=4):
hop = fftsize / overlap
w = scipy.hanning(fftsize+1)[:-1]
return np.array([np.fft.rfft(w*x[i:i+fftsize]) for i in range(0, len(x)-fftsize, hop)])
for i in range(4):
print i
sys.stdout.flush()
dn = './data/clips/Dog_%d/'%(i+1)
fns = [fn for fn in os.listdir(dn) if '.mat' in fn]
feat = []
labels = []
for fn in fns:
m = loadmat(dn+fn)
d = m['data']
d = resample(d, int(d.shape[1]*500/m['freq']), axis=1)
s = array([stft(x,64,8) for x in d])
s = sqrt(real(s)**2+imag(s)**2)
if 'inter' in fn:
l = 0
elif '_ictal' in fn:
l = 1
else:
l = -1
labels.append(l)
feat.append(s.flatten())
feat = array(feat)
labels = array(labels)
fns = array(fns)
d = {}
d['feat'] = feat
d['labels'] = labels
d['feat_norm'] = (feat-feat.mean(0))/feat.std(0)
d['fns'] = fns
savez('./data/data_Dog_%d'%(i+1), **d)
for i in range(8):
print i
sys.stdout.flush()
dn = './data/clips/Patient_%d/'%(i+1)
fns = [fn for fn in os.listdir(dn) if '.mat' in fn]
feat = []
labels = []
for fn in fns:
m = loadmat(dn+fn)
d = m['data']
d = resample(d, int(d.shape[1]*500/m['freq']), axis=1)
s = array([stft(x,64,8) for x in d])
s = sqrt(real(s)**2+imag(s)**2)
if 'inter' in fn:
l = 0
elif '_ictal' in fn:
l = 1
else:
l = -1
labels.append(l)
feat.append(s.flatten())
feat = array(feat)
labels = array(labels)
fns = array(fns)
d = {}
d['feat'] = feat
d['labels'] = labels
d['feat_norm'] = (feat-feat.mean(0))/feat.std(0)
d['fns'] = fns
savez('./data/data_Patient_%d'%(i+1), **d)
In [ ]:
####################################################################
## Make predictions with logistic regression on spectrogram features
####################################################################
preds = []
for i in range(4):
print 'Dog %d'%(i+1)
sys.stdout.flush()
d = load('./data/data_Dog_%d.npz'%(i+1))
labels = d['labels'][d['labels'] != -1]
data_normed = d['feat_norm'][d['labels'] != -1]
data_test = d['feat_norm'][d['labels']==-1]
fn_test = d['fns'][d['labels']==-1]
clf = sklearn.linear_model.LogisticRegression()
clf.fit(data_normed, labels)
print clf.score(data_normed, labels)
pred_test = clf.predict_proba(data_test)
for fn, p in zip(fn_test, pred_test):
preds.append('%s,%f,0'%(fn,p[1]))
for i in range(8):
print 'Patient %d'%(i+1)
sys.stdout.flush()
d = load('./data/data_Patient_%d.npz'%(i+1))
labels = d['labels'][d['labels'] != -1]
data_normed = d['feat_norm'][d['labels'] != -1]
data_test = d['feat_norm'][d['labels']==-1]
fn_test = d['fns'][d['labels']==-1]
clf = sklearn.linear_model.LogisticRegression()
clf.fit(data_normed, labels)
print clf.score(data_normed, labels)
pred_test = clf.predict_proba(data_test)
for fn, p in zip(fn_test, pred_test):
preds.append('%s,%f,0'%(fn,p[1]))
of = open('submission.csv','w')
of.write('clip,seizure,early\n')
for p in preds:
of.write(p+'\n')
of.close()
In [ ]:
###################################################################
## Generate maximal cross-correlation features
###################################################################
import os, sys
from scipy.io import loadmat
import numpy as np
from scipy.signal import resample
from joblib import Parallel, delayed
def max_ccors(X):
N = np.correlate(np.ones(X.shape[1]),np.ones(X.shape[1]),'full')
ac = [np.correlate(X[i],X[i])[0] for i in range(X.shape[0])]
mcc = []
for i in range(X.shape[0]):
for j in range(i):
cc = np.correlate(X[i],X[j],'full')/N*1./np.sqrt(ac[i]*ac[j]+1e-6)
mcc.append(cc.max())
return np.array(mcc)
def process(fn):
m = loadmat(fn)
d = m['data']
d = resample(d, int(d.shape[1]*500/m['freq']), axis=1)
s = max_ccors(d)
if 'inter' in fn:
l = 0
elif '_ictal' in fn:
l = 1
else:
l = -1
return l, s
for i in range(4):
print i
sys.stdout.flush()
dn = './data/clips/Dog_%d/'%(i+1)
fns = [fn for fn in os.listdir(dn) if '.mat' in fn]
results = Parallel(n_jobs=-1)(delayed(process)(dn+fns[i]) for i in xrange(len(fns)))
labels = np.array([x[0] for x in results])
feat = np.array([x[1] for x in results])
fns = np.array(fns)
d = {}
d['labels'] = labels
d['feat_norm'] = (feat-feat.mean(0))/feat.std(0)
d['fns'] = fns
d['feat'] = feat
np.savez('./data/data_ccor_Dog_%d'%(i+1), **d)
for i in range(8):
print i
sys.stdout.flush()
dn = './data/clips/Patient_%d/'%(i+1)
fns = [fn for fn in os.listdir(dn) if '.mat' in fn]
results = Parallel(n_jobs=-1)(delayed(process)(dn+fns[i]) for i in xrange(len(fns)))
labels = np.array([x[0] for x in results])
feat = np.array([x[1] for x in results])
fns = np.array(fns)
d = {}
d['labels'] = labels
d['feat_norm'] = (feat-feat.mean(0))/feat.std(0)
d['fns'] = fns
d['feat'] = feat
np.savez('./data/data_ccor_Patient_%d'%(i+1), **d)
In [ ]:
#############################################################################
## Prepare files for scattering coefficient extraction (using Matlab/Octave)
#############################################################################
import sys, os
import scipy.io
import scipy.signal
for i in range(1,5):
dn = 'Dog_%d'%i
fns = os.listdir('./data/clips/'+dn)
ifns = ['./data/clips/'+dn+'/'+fn for fn in fns]
ofns = ['./data/scattering/'+dn+'/'+fn for fn in fns]
scipy.io.savemat('./data/scattering/'+dn+'_fns.mat',{'ofns':ofns,'ifns':ifns})
for i in range(1,9):
dn = 'Patient_%d'%i
try:
os.makedirs('./data/resample/'+dn)
except:
continue
print dn
sys.stdout.flush()
fns = os.listdir('./data/clips/'+dn)
ifns = ['./data/resample/'+dn+'/'+fn for fn in fns]
ofns = ['./data/scattering/'+dn+'/'+fn for fn in fns]
for fn in fns:
data = scipy.io.loadmat('./data/clips/'+dn+'/'+fn)['data']
data = scipy.signal.resample(data,400,axis=1)
scipy.io.savemat('./data/resample/'+dn+'/'+fn, {'data':data})
scipy.io.savemat('./data/scattering/'+dn+'_fns.mat',{'ofns':ofns,'ifns':ifns})
Matlab code to calculate scattering coefficients from the clips
% Filter length
N = 400;
% Calculate coefficients with averaging scale of 8192 samples (~370 ms @
% 22050 Hz sampling rate).
T = 40;
% First-order filter bank with 8 wavelets per octave. Second-order filter bank
% with 1 wavelet per octave.
filt_opt.Q = [8 1];
% Calculate maximal wavelet scale so that largest wavelet will be of bandwidth
% T.
filt_opt.J = T_to_J(T, filt_opt);
% Only calculate scattering coefficients up to second order.
scat_opt.M = 2;
% Prepare wavelet transforms to be used for scattering.
Wop = wavelet_factory_1d(N, filt_opt, scat_opt);
feature_fun = @(x)(format_scat(log_scat(renorm_scat(scat(x, Wop)))));
mats = {'Dog_3_fns.mat', 'Dog_4_fns.mat', 'Patient_1_fns.mat','Patient_2_fns.mat','Patient_3_fns.mat','Patient_4_fns.mat','Patient_5_fns.mat','Patient_6_fns.mat','Patient_7_fns.mat','Patient_8_fns.mat'}
for j=2:2
load(mats{j})
for i=1:length(ifns)
load(strtrim(ifns(i,:)));
d = reshape(transpose(data),400,1,16);
f = feature_fun(d);
save(strtrim(ofns(i,:)), 'f');
end
end
In [ ]: