In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
from collections import defaultdict
In [9]:
import scipy.io
from scipy.signal import resample, hann
import numpy.fft
import hickle as hkl
ham_energies = defaultdict(list)
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
outdir = '../filtered-seizure-data/%s'%target
!mkdir {outdir}
for data_type in ['preictal', 'interictal', 'test']:
last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
for segment in range(10000):
fname = '../seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,segment+1)
try:
data = scipy.io.loadmat(fname)
except:
break
k = '%s_segment_%d'%(data_type,segment+1)
data_length_sec = data[k]['data_length_sec'][0,0][0,0]
try:
sequence = data[k]['sequence'][0,0][0,0]
except:
sequence = 1
Fs = float(data[k]['sampling_frequency'][0,0][0,0])
channels = [t[0] for t in data[k]['channels'][0,0][0]]
d = data[k]['data'][0,0]
assert len(channels) == d.shape[0]
N = d.shape[1]
assert int(Fs*data_length_sec + 0.5) == N,int(Fs*data_length_sec + 0.5)
assert last_data_length_sec is None or last_data_length_sec == data_length_sec
last_data_length_sec = data_length_sec
assert last_Fs is None or last_Fs == Fs
last_Fs = Fs
assert last_channels is None or all(c1==c2 for c1,c2 in zip(last_channels, channels))
last_channels = channels
assert last_d_shape is None or last_d_shape == d.shape
last_d_shape = d.shape
d = d.astype(float)
# undo DC removal on Patients training data
W = 1
if data_type != 'test' and Fs > 410 and last_sequence is not None and last_sequence < sequence:
data_left_edge_mean = d[:,:W].mean(axis=-1)
data_offset = data_left_edge_mean - last_data_right_edge_mean
d -= data_offset.reshape(-1,1)
last_data_right_edge_mean = d[:,-W:].mean(axis=-1)
# detect if the signal has a 60Hz ham noise
notchfreq = 60.
q = int(Fs/notchfreq+0.5)
matchfilter = np.empty((d.shape[0],q))
for i in range(q):
matchfilter[:,i] = np.mean(d[:,(np.arange(i,N,Fs/notchfreq)).astype(int)], axis=-1)
ham_energy = np.mean(np.std(matchfilter, axis=-1))
ham_energies[fname].append(ham_energy)
# build a filter which will remove all the harmoniues of 60Hz
if ham_energy > 1:
notchwidth = 0.8
notchwidths = notchwidth/2./Fs
notchfreqs = notchfreq/Fs
fftfreq = np.abs(numpy.fft.fftfreq(d.shape[-1]))
notch = np.ones(N)
for h in range(1,int((Fs/2.)/notchfreq+0.5)):
notch[np.abs(fftfreq - h*notchfreqs) <= notchwidths] = 0.
else:
notch = None
# remove ham and resample to 399.61Hz or 239766 samples per segment
if Fs > 410 or notch is not None:
data_resamp = resample(d, 239766, axis=-1, window=notch)
F = 399.61
# print np.sum(np.abs(fftfreq - h*notchfreqs) <= notchwidths)/float(N)
else:
data_resamp = d.copy()
F = Fs
# data[k]['sampling_frequency'][0,0][0,0] = F
# data[k]['data'][0,0] = data_resamp
# save result in compressed HDF5, keep the sequence number in the file name
foutname = '../filtered-seizure-data/%s/%s_%s_segment_%04d_%d.hkl'%(target,target,data_type,segment+1,sequence)
hkl.dump(data_resamp, foutname, mode="w", compression='gzip')
In [10]:
with open('../filtered-seizure-data/ham_energies.pkl', 'wb') as fp:
pickle.dump(ham_energies, fp, -1)
In [2]:
with open('../filtered-seizure-data/ham_energies.pkl', 'rb') as fp:
ham_energies = pickle.load(fp)
In [3]:
d = defaultdict(list)
for k, v in ham_energies.iteritems():
n = k.split('/')[2]
d[n].append(v[0])
In [4]:
for k, v in d.iteritems():
v = np.array(v)
print k, v.mean(), v.std(), v.max(), np.sum(v > 1)
In [ ]: