In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

In [2]:
import sys
sys.path.append('..')

In [3]:
target = 'Patient_2'
data_type = 'preictal' # preictal interictal, test

according to 140905-feature-importance the most important cannel is


In [4]:
channel = 3

In [5]:
import scipy.io
segment = 1 # start segment
all_data = last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
nsegments = 0
while True:
    fname = '../seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,segment)
    try:
        data = scipy.io.loadmat(fname)
    except:
        break
    k = '%s_segment_%d'%(data_type,segment)
    for k1 in data.keys():
        if k1 != k and data[k1]:
            print data[k1],
    print
    data_length_sec = data[k]['data_length_sec'][0,0][0,0]
    try:
        sequence = data[k]['sequence'][0,0][0,0]
    except:
        sequence = 1 # test data
    Fs = float(data[k]['sampling_frequency'][0,0][0,0])
    channels = [t[0] for t in data[k]['channels'][0,0][0]]
    d = data[k]['data'][0,0]
    print segment,data_length_sec,sequence,Fs,d.shape
    
    assert len(channels) == d.shape[0]
    assert int(Fs*data_length_sec + 0.5) == d.shape[1],int(Fs*data_length_sec + 0.5)
    assert last_data_length_sec is None or last_data_length_sec == data_length_sec
    last_data_length_sec = data_length_sec
    assert last_Fs is None or last_Fs == Fs
    last_Fs = Fs
    assert last_channels is None or all(c1==c2 for c1,c2 in zip(last_channels, channels))
    last_channels = channels
    assert last_d_shape is None or last_d_shape == d.shape
    last_d_shape = d.shape
    
    if last_sequence is None:
        all_data = d
        last_sequence = sequence
        N = d.shape[1]
        nsegments = 1
    elif last_sequence+1 == sequence:
        assert N == d.shape[1]
        all_data = np.hstack((all_data,d))
        last_sequence = sequence
        nsegments += 1
    else:
        break
        # get the last sequence
        all_data = last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
        nsegments = 0
    segment += 1
data_length_sec, sequence, Fs, all_data.shape, nsegments


1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
1 600 1 5000.0 (24, 3000000)
1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
2 600 2 5000.0 (24, 3000000)
1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
3 600 3 5000.0 (24, 3000000)
1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
4 600 4 5000.0 (24, 3000000)
1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
5 600 5 5000.0 (24, 3000000)
1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
6 600 6 5000.0 (24, 3000000)
1.0 MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Thu Aug 21 01:00:00 2014
7 600 1 5000.0 (24, 3000000)
Out[5]:
(600, 1, 5000.0, (24, 18000000), 6)

In [6]:
segments_mean = None
for c in range(nsegments):
    segment_mean = all_data[:,c*N:(c+1)*N].mean(axis=-1)
    segments_mean = segment_mean if segments_mean is None else np.hstack((segments_mean,segment_mean))

note that the means over all segments and channels are between $-0.5$ to $+0.5$ which looks like an integer mean was removed


In [7]:
pl.hist(segments_mean);



In [8]:
cutoff = Fs/100. # desired cutoff frequency of the filter, Hz
import scipy.signal
nyq = 0.5 * Fs
normal_cutoff = cutoff / nyq
order = 6
b, a = scipy.signal.butter(order, normal_cutoff, btype='low', analog=False)
# scipy.signal.lfilter(b, a, data)
h = scipy.signal.firwin(100, normal_cutoff)

In [10]:
from scipy.interpolate import splrep, splev
S = 10

Nbase = 8
Nchannels = 4 # d.shape[0]
W=1000
W1 = max(int(Fs),W)
c=1
alpha=0.99999
Q = 10
print alpha**Q
norm = 1./sum(alpha**n for n in range(Q))

for i in range(Nchannels):
    pl.subplot(Nchannels,1,i+1)
    channel = i+Nbase
    pl.plot(all_data[channel,c*N-W:c*N+W])
    # use filtfilt to get zero phase http://wiki.scipy.org/Cookbook/FiltFilt
#     x1 = scipy.signal.filtfilt(b, a, all_data[channel,c*N-W1:c*N])#,padtype=None)
#     x2 = scipy.signal.filtfilt(b, a, all_data[channel,c*N+W1-1:c*N-1:-1])#,padtype=None)[::-1]
#     x1 = norm*np.array([sum(alpha**n * all_data[channel,j-n] for n in range(Q)) for j in range(c*N-W,c*N)])
#     x2 = norm*np.array([sum(alpha**n * all_data[channel,j+n] for n in range(Q)) for j in range(c*N,c*N+W)])
#     pl.plot(np.hstack((x1[-W:],x2[:W])))
#     x3 = scipy.signal.lfilter(h, 1, all_data[i+Nbase,c*N-W1:c*N+W1])
#     pl.plot(x3[W1-W:W1+W])
#     spl = splrep(np.arange(S), all_data[channel,c*N-S:c*N], k=3, s=15)
#     y1 = splev(S, spl)
#     spl = splrep(np.arange(S), all_data[channel,c*N+S-1:c*N-1:-1], k=1, s=5)
#     y2 = splev(S, spl)
#     print (all_data[channel,c*N-1],all_data[channel,c*N]),(y1,y2),all_data[channel,c*N-S:c*N],all_data[channel,c*N:c*N+S]
    pl.title(channels[i+Nbase])
pl.gcf().set_size_inches(14,8);


0.9999000045

In [23]:
data_offset[0]


Out[23]:
1126.7

In [ ]:
from scipy.interpolate import splrep, splev
S = 10

Nbase = 0
Nchannels = 4 # d.shape[0]
W=1000
W1 = max(int(Fs),W)
c=1
alpha=0.99999
Q = 10
print alpha**Q
norm = 1./sum(alpha**n for n in range(Q))

this is what we use when gen_ictal is negative


In [ ]:
prev_data = all_data[:,c*N-W:c*N]
next_data = all_data[:,c*N:c*N+W]
data_offset = next_data[:,0:10].mean(axis=-1) - prev_data[:,-10:].mean(axis=-1)

In [27]:
for i in range(Nchannels):
    pl.subplot(Nchannels,1,i+1)
    channel = i+Nbase
    
    prev_data = all_data[channel,c*N-W:c*N]
    next_data = all_data[channel,c*N:c*N+W] - data_offset[channel]
    
    d = np.concatenate((prev_data, next_data), axis=-1)
    
    pl.plot(d)
    pl.title(channels[i+Nbase])
pl.gcf().set_size_inches(14,8);


0.9999000045

here is an alternative

a good way to measure match between edge columns


In [ ]:
-2.*np.dot(j1,j2)/(np.dot(j1,j1) + np.dot(j2,j2))

lets say we add a constant to left column (j1)


In [ ]:
-2.*np.dot(j1+c,j2)/(np.dot(j1+c,j1+c) + np.dot(j2,j2))

which is


In [ ]:
-2.*(np.dot(j1,j2)+c*np.sum(j2))/(np.dot(j1+c,j1+c) + np.dot(j2,j2))

we can use the identity:


In [ ]:
np.dot(j1+c,j1+c) = np.dot(j1,j1+c) + np.dot(c,j1+c) = \
np.dot(j1,j1+c) + c*np.sum(j1+c) = \
np.dot(j1,j1) + np.dot(j1,c) + c*np.sum(j1) + c*c*N = \
np.dot(j1,j1) + 2*c*np.sum(j1) + c*c*N

to minimize for c