In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
from collections import defaultdict

In [9]:
import scipy.io
from scipy.signal import resample, hann
import numpy.fft
import hickle as hkl

ham_energies = defaultdict(list)

for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    outdir = '../filtered-seizure-data/%s'%target
    !mkdir {outdir}
    for data_type in ['preictal', 'interictal', 'test']:
        last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
        for segment in range(10000):
            fname = '../seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,segment+1)
            try:
                data = scipy.io.loadmat(fname)
            except:
                break

            k = '%s_segment_%d'%(data_type,segment+1)
            data_length_sec = data[k]['data_length_sec'][0,0][0,0]
            try:
                sequence = data[k]['sequence'][0,0][0,0]
            except:
                sequence = 1
            Fs = float(data[k]['sampling_frequency'][0,0][0,0])
            channels = [t[0] for t in data[k]['channels'][0,0][0]]
            d = data[k]['data'][0,0]

            assert len(channels) == d.shape[0]
            N = d.shape[1]
            assert int(Fs*data_length_sec + 0.5) == N,int(Fs*data_length_sec + 0.5)
            assert last_data_length_sec is None or last_data_length_sec == data_length_sec
            last_data_length_sec = data_length_sec
            assert last_Fs is None or last_Fs == Fs
            last_Fs = Fs
            assert last_channels is None or all(c1==c2 for c1,c2 in zip(last_channels, channels))
            last_channels = channels
            assert last_d_shape is None or last_d_shape == d.shape
            last_d_shape = d.shape
            

            d = d.astype(float)
            
            # undo DC removal on Patients training data
            W = 1
            if data_type != 'test' and Fs > 410 and last_sequence is not None and last_sequence < sequence:
                data_left_edge_mean = d[:,:W].mean(axis=-1)
                data_offset = data_left_edge_mean - last_data_right_edge_mean
                d -= data_offset.reshape(-1,1)
            last_data_right_edge_mean = d[:,-W:].mean(axis=-1)
            
            # detect if the signal has a 60Hz ham noise
            notchfreq = 60.
            q = int(Fs/notchfreq+0.5)
            matchfilter = np.empty((d.shape[0],q))
            for i in range(q):
                matchfilter[:,i] = np.mean(d[:,(np.arange(i,N,Fs/notchfreq)).astype(int)], axis=-1)
            ham_energy = np.mean(np.std(matchfilter, axis=-1))
            ham_energies[fname].append(ham_energy)
            
            # build a filter which will remove all the harmoniues of 60Hz
            if ham_energy > 1:
                notchwidth = 0.8
                notchwidths = notchwidth/2./Fs
                notchfreqs = notchfreq/Fs
                fftfreq = np.abs(numpy.fft.fftfreq(d.shape[-1]))
                notch = np.ones(N)
                for h in range(1,int((Fs/2.)/notchfreq+0.5)):
                    notch[np.abs(fftfreq - h*notchfreqs) <= notchwidths] = 0.
            else:
                notch = None

            # remove ham and resample to 399.61Hz or 239766 samples per segment
            if Fs > 410 or notch is not None:
                data_resamp = resample(d, 239766, axis=-1, window=notch)
                F = 399.61
        #         print np.sum(np.abs(fftfreq - h*notchfreqs) <= notchwidths)/float(N)
            else:
                data_resamp = d.copy()
                F = Fs
#             data[k]['sampling_frequency'][0,0][0,0] = F
#             data[k]['data'][0,0] = data_resamp
            # save result in compressed HDF5, keep the sequence number in the file name
            foutname = '../filtered-seizure-data/%s/%s_%s_segment_%04d_%d.hkl'%(target,target,data_type,segment+1,sequence)
            hkl.dump(data_resamp, foutname, mode="w", compression='gzip')


dumping <type 'numpy.ndarray'> to file <HDF5 file "Dog_1_preictal_segment_0001_1.hkl" (mode r+)>
dumping <type 'numpy.ndarray'> to file <HDF5 file "Dog_1_preictal_segment_0002_2.hkl" (mode r+)>

In [10]:
with open('../filtered-seizure-data/ham_energies.pkl', 'wb') as fp:
    pickle.dump(ham_energies, fp, -1)

In [2]:
with open('../filtered-seizure-data/ham_energies.pkl', 'rb') as fp:
    ham_energies = pickle.load(fp)

In [3]:
d = defaultdict(list)
for k, v in ham_energies.iteritems():
    n = k.split('/')[2]
    d[n].append(v[0])

In [4]:
for k, v in d.iteritems():
    v = np.array(v)
    print k, v.mean(), v.std(), v.max(), np.sum(v > 1)


Dog_2 0.0326192303272 0.00790003661274 0.116521603785 0
Dog_3 0.0444002504133 0.0165562878097 0.114304152885 0
Dog_1 0.0352437171572 0.0107064466846 0.100232934898 0
Dog_4 0.0881492909567 0.0192787227559 0.174261883249 0
Dog_5 0.0534660452149 0.0258777272614 0.347694948647 0
Patient_2 0.665725882136 1.08888571089 11.3448262223 40
Patient_1 127.622399826 872.430725516 9123.07001337 77

In [ ]: