In [ ]:
import sklearn
sklearn.__version__
In [ ]:
import cPickle as pickle
import hickle as hkl
In [ ]:
import sys
sys.path.append('..')
import seizure.tasks
seizure.tasks.task_predict = True
from seizure.tasks import load_mat_data, count_mat_data
In [ ]:
skip = 20
In [ ]:
data_dir='../filtered-seizure-data'
In [ ]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
print target
# input size
data_types = ['preictal', 'interictal']
Ns = 0
for data_type in data_types:
for i in count_mat_data(data_dir, target, data_type):
Ns += 1
print 'Number of segments', Ns
# sample input
Nf = None
i = 0
scount = 0
for data_type in data_types:
mat_data = load_mat_data(data_dir, target, data_type)
for segment in mat_data:
for key in segment.keys():
if not key.startswith('_'):
break
data = segment[key]['data'][0,0]
if Nf is None:
Nf, N0 = data[:,::skip].shape
N = Ns*N0
print 'Number of channels', Nf, N
X = np.empty((N, Nf))
X[i:i+N0,:] = data[:,::skip].T
i += N0
# build model from sampled data
import sklearn.decomposition
m = sklearn.decomposition.FastICA()
#m = sklearn.decomposition.PCA(whiten=True)
# m = sklearn.decomposition.RandomizedPCA()
m.fit(X)
# transform all data
outdir = '../ica-seizure-data/%s'%target
!mkdir -p {outdir}
for data_type in ['preictal', 'interictal', 'test']:
mat_data = load_mat_data(data_dir, target, data_type)
for isegment, segment in enumerate(mat_data):
for key in segment.keys():
if not key.startswith('_'):
break
try:
sequence = segment[key]['sequence'][0,0][0,0]
except:
sequence = 1
data = segment[key]['data'][0,0]
data = m.transform(data.T).T
foutname = outdir + '/%s_%s_segment_%04d_%d.hkl'%(target, data_type, isegment + 1, sequence)
hkl.dump(data, foutname, mode="w", compression='gzip')
In [ ]: