In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random
In [2]:
import sys
sys.path.append('..')
In [3]:
target = 'Patient_2'
data_type = 'preictal' # preictal interictal, test
according to 140905-feature-importance the most important cannel is
In [4]:
channel = 3
In [5]:
import scipy.io
segment = 1 # start segment
all_data = last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
nsegments = 0
while True:
fname = '../seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,segment)
try:
data = scipy.io.loadmat(fname)
except:
break
k = '%s_segment_%d'%(data_type,segment)
for k1 in data.keys():
if k1 != k and data[k1]:
print data[k1],
print
data_length_sec = data[k]['data_length_sec'][0,0][0,0]
try:
sequence = data[k]['sequence'][0,0][0,0]
except:
sequence = 1 # test data
Fs = float(data[k]['sampling_frequency'][0,0][0,0])
channels = [t[0] for t in data[k]['channels'][0,0][0]]
d = data[k]['data'][0,0]
print segment,data_length_sec,sequence,Fs,d.shape
assert len(channels) == d.shape[0]
assert int(Fs*data_length_sec + 0.5) == d.shape[1],int(Fs*data_length_sec + 0.5)
assert last_data_length_sec is None or last_data_length_sec == data_length_sec
last_data_length_sec = data_length_sec
assert last_Fs is None or last_Fs == Fs
last_Fs = Fs
assert last_channels is None or all(c1==c2 for c1,c2 in zip(last_channels, channels))
last_channels = channels
assert last_d_shape is None or last_d_shape == d.shape
last_d_shape = d.shape
if last_sequence is None:
all_data = d
last_sequence = sequence
N = d.shape[1]
nsegments = 1
elif last_sequence+1 == sequence:
assert N == d.shape[1]
all_data = np.hstack((all_data,d))
last_sequence = sequence
nsegments += 1
else:
break
# get the last sequence
all_data = last_sequence = last_data_length_sec = last_Fs = last_channels = last_d_shape = None
nsegments = 0
segment += 1
data_length_sec, sequence, Fs, all_data.shape, nsegments
Out[5]:
In [6]:
segments_mean = None
for c in range(nsegments):
segment_mean = all_data[:,c*N:(c+1)*N].mean(axis=-1)
segments_mean = segment_mean if segments_mean is None else np.hstack((segments_mean,segment_mean))
note that the means over all segments and channels are between $-0.5$ to $+0.5$ which looks like an integer mean was removed
In [7]:
pl.hist(segments_mean);
In [8]:
cutoff = Fs/100. # desired cutoff frequency of the filter, Hz
import scipy.signal
nyq = 0.5 * Fs
normal_cutoff = cutoff / nyq
order = 6
b, a = scipy.signal.butter(order, normal_cutoff, btype='low', analog=False)
# scipy.signal.lfilter(b, a, data)
h = scipy.signal.firwin(100, normal_cutoff)
In [10]:
from scipy.interpolate import splrep, splev
S = 10
Nbase = 8
Nchannels = 4 # d.shape[0]
W=1000
W1 = max(int(Fs),W)
c=1
alpha=0.99999
Q = 10
print alpha**Q
norm = 1./sum(alpha**n for n in range(Q))
for i in range(Nchannels):
pl.subplot(Nchannels,1,i+1)
channel = i+Nbase
pl.plot(all_data[channel,c*N-W:c*N+W])
# use filtfilt to get zero phase http://wiki.scipy.org/Cookbook/FiltFilt
# x1 = scipy.signal.filtfilt(b, a, all_data[channel,c*N-W1:c*N])#,padtype=None)
# x2 = scipy.signal.filtfilt(b, a, all_data[channel,c*N+W1-1:c*N-1:-1])#,padtype=None)[::-1]
# x1 = norm*np.array([sum(alpha**n * all_data[channel,j-n] for n in range(Q)) for j in range(c*N-W,c*N)])
# x2 = norm*np.array([sum(alpha**n * all_data[channel,j+n] for n in range(Q)) for j in range(c*N,c*N+W)])
# pl.plot(np.hstack((x1[-W:],x2[:W])))
# x3 = scipy.signal.lfilter(h, 1, all_data[i+Nbase,c*N-W1:c*N+W1])
# pl.plot(x3[W1-W:W1+W])
# spl = splrep(np.arange(S), all_data[channel,c*N-S:c*N], k=3, s=15)
# y1 = splev(S, spl)
# spl = splrep(np.arange(S), all_data[channel,c*N+S-1:c*N-1:-1], k=1, s=5)
# y2 = splev(S, spl)
# print (all_data[channel,c*N-1],all_data[channel,c*N]),(y1,y2),all_data[channel,c*N-S:c*N],all_data[channel,c*N:c*N+S]
pl.title(channels[i+Nbase])
pl.gcf().set_size_inches(14,8);
In [23]:
data_offset[0]
Out[23]:
In [ ]:
from scipy.interpolate import splrep, splev
S = 10
Nbase = 0
Nchannels = 4 # d.shape[0]
W=1000
W1 = max(int(Fs),W)
c=1
alpha=0.99999
Q = 10
print alpha**Q
norm = 1./sum(alpha**n for n in range(Q))
this is what we use when gen_ictal is negative
In [ ]:
prev_data = all_data[:,c*N-W:c*N]
next_data = all_data[:,c*N:c*N+W]
data_offset = next_data[:,0:10].mean(axis=-1) - prev_data[:,-10:].mean(axis=-1)
In [27]:
for i in range(Nchannels):
pl.subplot(Nchannels,1,i+1)
channel = i+Nbase
prev_data = all_data[channel,c*N-W:c*N]
next_data = all_data[channel,c*N:c*N+W] - data_offset[channel]
d = np.concatenate((prev_data, next_data), axis=-1)
pl.plot(d)
pl.title(channels[i+Nbase])
pl.gcf().set_size_inches(14,8);
here is an alternative
a good way to measure match between edge columns
In [ ]:
-2.*np.dot(j1,j2)/(np.dot(j1,j1) + np.dot(j2,j2))
lets say we add a constant to left column (j1)
In [ ]:
-2.*np.dot(j1+c,j2)/(np.dot(j1+c,j1+c) + np.dot(j2,j2))
which is
In [ ]:
-2.*(np.dot(j1,j2)+c*np.sum(j2))/(np.dot(j1+c,j1+c) + np.dot(j2,j2))
we can use the identity:
In [ ]:
np.dot(j1+c,j1+c) = np.dot(j1,j1+c) + np.dot(c,j1+c) = \
np.dot(j1,j1+c) + c*np.sum(j1+c) = \
np.dot(j1,j1) + np.dot(j1,c) + c*np.sum(j1) + c*c*N = \
np.dot(j1,j1) + 2*c*np.sum(j1) + c*c*N
to minimize for c