In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
from collections import defaultdict
In [3]:
from IPython.parallel import Client
Ncores = 0
while Ncores < 32:
try:
client = Client()
lv = client.load_balanced_view()
#lv.set_flags(block = False, retries = 0)
clients=client[:]
Ncores = len(clients)
except:
Ncores = 0
print Ncores
In [4]:
def work(task):
err = 0
import scipy.io
from scipy.signal import resample, hann
import numpy.fft
import hickle as hkl
import numpy as np
target = task[0]
data_type = task[1]
outdir = '/vol2/seizure-prediction/filtered-seizure-data/%s'%target
last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
prev_data = None
for segment in range(10000):
fname = '/vol2/seizure-prediction/seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,segment+1)
try:
err = fname
data = scipy.io.loadmat(fname)
err = 2
except:
break
k = '%s_segment_%d'%(data_type,segment+1)
data_length_sec = data[k]['data_length_sec'][0,0][0,0]
try:
sequence = data[k]['sequence'][0,0][0,0]
except:
sequence = 1
Fs = float(data[k]['sampling_frequency'][0,0][0,0])
channels = [t[0] for t in data[k]['channels'][0,0][0]]
d = data[k]['data'][0,0]
assert len(channels) == d.shape[0]
N = d.shape[1]
assert int(Fs*data_length_sec + 0.5) == N,int(Fs*data_length_sec + 0.5)
assert last_data_length_sec is None or last_data_length_sec == data_length_sec
last_data_length_sec = data_length_sec
assert last_Fs is None or last_Fs == Fs
last_Fs = Fs
assert last_channels is None or all(c1==c2 for c1,c2 in zip(last_channels, channels))
last_channels = channels
assert last_d_shape is None or last_d_shape == d.shape
last_d_shape = d.shape
d = d.astype(float)
if last_sequence is not None and last_sequence+1 != sequence:
prev_data = None
last_sequence = sequence
# remove ham and resample to 399.61Hz or 239766 samples per segment
if abs(Fs-5000) < 100:
# only Patients need offset correction
if prev_data is not None:
data_offset = d[:,0] - prev_data
d -= data_offset.reshape(-1,1)
prev_data = data[:,-1]
def mynotch(fftfreq, notchfreq=60., notchwidth=5., Fs=Fs):
return np.double(np.abs(np.abs(fftfreq) - notchfreq/Fs) > (notchwidth/2.)/Fs)
data_resamp = resample(d, 239766, axis=-1, window=mynotch)
F = 399.61
# print np.sum(np.abs(fftfreq - h*notchfreqs) <= notchwidths)/float(N)
else:
data_resamp = d.copy()
F = Fs
# data[k]['sampling_frequency'][0,0][0,0] = F
# data[k]['data'][0,0] = data_resamp
# save result in compressed HDF5, keep the sequence number in the file name
foutname = outdir + '/%s_%s_segment_%04d_%d.hkl'%(target,data_type,segment+1,sequence)
hkl.dump(data_resamp, foutname, mode="w", compression='gzip')
return target, data_type, segment, err
In [5]:
data_types = ['preictal', 'interictal', 'test']
targets = ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']
In [6]:
results = lv.map(work, [(t,d) for t in targets for d in data_types])
In [ ]:
for r in results:
print r
In [ ]: