Create a signal jump distance matrix from the training data. This can be used to validate the same done on test data with 140926-train-signal-jump


In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os
import random

Extarct the first and last W data samples of each test segment


In [2]:
W = 10

In [3]:
def mydist(j1, j2):
    return -2.*np.dot(j1,j2)/(np.dot(j1,j1) + np.dot(j2,j2))

In [4]:
import scipy.io
results = {}
for target in ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']:
    results[target] = {}
    # we are treating positive and negative examples in the same way as if we did not knew their label
    edges = []
    next_segment = []
    previous_segment = []
    for data_type in ['preictal', 'interictal']:
        last_sequence = None
        for isegment in range(1000000):
            fname = '../seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,isegment+1)
            try:
                segment = scipy.io.loadmat(fname)
            except:
                break
            segment = segment['%s_segment_%d'%(data_type, isegment+1)]
            data = segment['data'][0,0]
            edges.append((data[:,:W].astype(float),data[:,-W:].astype(float)))
            sequence = segment['sequence'][0,0][0,0]
            
            N = len(edges) # number of segments we have in the combined preictal/interictal data
#             print sequence, N
            if last_sequence is None:
                # next_segment will be updated on the next iteration
                previous_segment.append(-1)
            elif last_sequence + 1 == sequence:
                next_segment.append(N-1)
                previous_segment.append(N-2)
            else:
                # we are on the first segment of the next chain
                next_segment.append(-1) # this refers to the previous segment
                previous_segment.append(-1)
            last_sequence = sequence
        next_segment.append(-1) # complete the last segment
        results[target][data_type] = isegment # keep number of positive and negative examples

    results[target]['previous_segment'] = previous_segment
    results[target]['next_segment'] = next_segment
    
    N = len(edges)
    dist = np.zeros((N,N))
    for i in range(N):
        ei = edges[i]
        for j in range(i+1,N):
            ej = edges[j]
            dist[i,j] = mydist(ei[1][:,-1],ej[0][:,0])
            dist[j,i] = mydist(ej[1][:,-1],ei[0][:,0])

    for i in range(N):
        dist[i,i] = 1 # 1 is bad (-1 is good) we dont want a segment to attach to itself...
    results[target]['dist'] = dist

with open('../submissions/140929-validate-signal-jump.pkl', 'wb') as fp:
    pickle.dump(results, fp, -1)

In [4]: