In [ ]:
import pandas as pd
import numpy as np
import os
import scipy.io
In [ ]:
data_type = 'test'
In [ ]:
targets = ['Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2']
In [ ]:
def mydist(j1, j2):
return -2.*np.dot(j1,j2)/(np.dot(j1,j1) + np.dot(j2,j2))
In [ ]:
FNAME_IN = 'submissions/SUBMISSON.csv'
In [ ]:
FNAME_OUT = 'submissions/IMPROVED_SUBMISSON.csv'
In [ ]:
scores = pd.read_csv(FNAME_IN, index_col='clip', squeeze=True)
out_scores = scores.copy()
In [ ]:
W=0.25
T=0.1
D=-0.5
for target in targets:
edges = []
for target in targets:
for segment in range(1,1000000):
fname = 'seizure-data/%s/%s_%s_segment_%04d.mat'%(target,target,data_type,segment)
try:
data = scipy.io.loadmat(fname)
k = '%s_segment_%d'%(data_type,segment)
d = data[k]['data'][0,0]
edges.append((d[:,:1].astype(float),d[:,-1:].astype(float)))
except:
break
N = len(edges)
d = np.zeros((N,N))
for i in range(N):
ei = edges[i]
for j in range(i+1,N):
ej = edges[j]
d[i,j] = mydist(ei[1][:,-1],ej[0][:,0])
d[j,i] = mydist(ej[1][:,-1],ei[0][:,0])
for i in range(N):
d[i,i] = 1
print
N = d.shape[0]
print target, N
dord = np.unravel_index(d.ravel().argsort(),d.shape)
Nsequences = N/6
# find good pairs of segments that are likely to be paired in time
next_segment = [-1]*N
previous_segment = [-1]*N
for i,(s1,s2) in enumerate(np.array(dord).T):
dist = d[s1,s2]
if dist > D:
break
if next_segment[s1] != -1:
#print i,'right conflict',dist
continue
if previous_segment[s2] != -1:
#print i,'left conflict',dist
continue
c = 1
j = s1
while previous_segment[j] != -1:
j = previous_segment[j]
c += 1
j = s2
c += 1
while next_segment[j] != -1:
j = next_segment[j]
c += 1
if c > 6:
continue
next_segment[s1] = s2
previous_segment[s2] = s1
# check code
for i in range(N):
if next_segment[i] != -1:
assert previous_segment[next_segment[i]] == i
# find good sequences
sequences = []
for i in range(N):
if previous_segment[i] == -1 and next_segment[i] != -1:
j = i
sequence = [j]
while next_segment[j] != -1:
j = next_segment[j]
sequence.append(j)
sequences.append(sequence)
len_sequences = [len(sequence) for sequence in sequences]
print '#sequences',len(sequences), '%segments that was sequenced',sum(len_sequences)/float(N), 'longest sequence', max(len_sequences)
print sequences
#compute probability for sequences
sequences_prb = []
for sequence in sequences:
probs = np.array([scores['%s_test_segment_%04d.mat'%(target,s+1)] for s in sequence])
wgts = np.exp(probs/T)
wgts /= np.sum(wgts)
p = np.dot(wgts, probs)
sequences_prb.append(p)
# fix probability for segments in sequences
for p,sequence in zip(sequences_prb,sequences):
# all segments in the same sequence will be assigned the same probability
n = 1./len(sequence)
for i, s in enumerate(sequence):
out_scores['%s_test_segment_%04d.mat'%(target,s+1)] = W*scores['%s_test_segment_%04d.mat'%(target,s+1)] +(1.-W)*p
In [ ]:
out_scores = out_scores/out_scores.max()
In [ ]:
out_scores.to_csv(FNAME_OUT, header=True)