In [ ]:
import sklearn
sklearn.__version__

In [ ]:
import cPickle as pickle
import hickle as hkl

In [ ]:
import sys
sys.path.append('..')
import seizure.tasks
seizure.tasks.task_predict = True

from seizure.tasks import load_mat_data, count_mat_data

In [ ]:
skip = 20

In [ ]:
data_dir='../filtered-seizure-data'

In [ ]:
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    print target
    # input size
    data_types = ['preictal', 'interictal']
    Ns = 0
    for data_type in data_types:
        for i in count_mat_data(data_dir, target, data_type):
            Ns += 1
    print 'Number of segments', Ns
    
    # sample input
    Nf = None
    i = 0
    scount = 0
    for data_type in data_types:
        mat_data = load_mat_data(data_dir, target, data_type)
        for segment in mat_data:
            for key in segment.keys():
                if not key.startswith('_'):
                    break
            data = segment[key]['data'][0,0]
            if Nf is None:
                Nf, N0 = data[:,::skip].shape
                N = Ns*N0
                print 'Number of channels', Nf, N
                X = np.empty((N, Nf))
            X[i:i+N0,:] = data[:,::skip].T
            i += N0
            
    # build model from sampled data
    import sklearn.decomposition
    m = sklearn.decomposition.FastICA()
    #m = sklearn.decomposition.PCA(whiten=True)
    # m = sklearn.decomposition.RandomizedPCA()
    m.fit(X)
    
    # transform all data
    outdir = '../ica-seizure-data/%s'%target
    !mkdir -p {outdir}
    for data_type in ['preictal', 'interictal', 'test']:
        mat_data = load_mat_data(data_dir, target, data_type)
        for isegment, segment in enumerate(mat_data):
            for key in segment.keys():
                if not key.startswith('_'):
                    break
            try:
                sequence = segment[key]['sequence'][0,0][0,0]
            except:
                sequence = 1
            data = segment[key]['data'][0,0]
            data = m.transform(data.T).T
            foutname = outdir + '/%s_%s_segment_%04d_%d.hkl'%(target, data_type, isegment + 1, sequence)
            hkl.dump(data, foutname, mode="w", compression='gzip')

In [ ]: