We will use distance between test segments computed in 140926-test-signal-jump to find sequence of segments that were likely together. Armed with this fact we can take the individual proababilities of each segment and combine it to form one probability that will be used to update the probabilites of all the segments in the sequence

the sequences are found using a greedy algoirthm that stops when a conflict is detected

the probabilities of segments should be combined by multiplying them, however this did not work well. Probably because the probabilites are not well calibrated. Taking the mean had a better effect.

Suppose you have a chain of segments: $i \in 1 \ldots N $

Each segment predicts a seizure $P_i$ or not $Q_i=1-P_i$

if a chain is negative then the probability is $\prod Q_i$ if a chain is positive the situation is more complex. There is a chance $U$ that a seizure detection even has happened and $V=1-U$ it did not. I estimate $U$ to be around $0.2$. So the probability is $\prod ( U * P_i + V*Q_i)$

or $\prod Q_i \times \Pi ( U \frac{P_i}{Q_i} +V )$

the ratio of positive to negative probability is just $r = \prod ( U \frac{P_i}{Q_i} +V )$ and probability is $1/(1+1/r)$


In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os

individual segment probablility file


In [7]:
w_gb = 0.471
scores = pd.DataFrame()
scores['gb'] = pd.read_csv('../submissions/140929-target-combine.validate.1.csv', index_col='clip', squeeze=True)
scores['rf'] = pd.read_csv('../submissions/140929-target-combine.validate.2.csv', index_col='clip', squeeze=True)
scores['y_est'] = w_gb * scores['gb'] + (1.-w_gb)*scores['rf']
scores['y'] = [int(s.find('preictal') >= 0) for s in scores.index.values]
scores['post'] = scores.y_est
from sklearn.metrics import roc_auc_score
roc_auc_score(scores.y, scores.y_est)


Out[7]:
0.86366122484269181

In [8]:
targets = set(['_'.join(f.split('_')[:2]) for f in scores.index.values])
targets


Out[8]:
{'Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2'}

from 140929-validate-signal-jump


In [82]:
with open('../submissions/140929-validate-signal-jump.pkl', 'rb') as fp:
    results = pickle.load(fp)

In [75]:
for i in range(N):
    if next_segment[i] != -1:
        break
i


Out[75]:
2

In [96]:
list(enumerate(next_segment))


Out[96]:
[(0, -1),
 (1, -1),
 (2, -1),
 (3, -1),
 (4, -1),
 (5, -1),
 (6, -1),
 (7, -1),
 (8, 9),
 (9, -1),
 (10, -1),
 (11, -1),
 (12, 13),
 (13, -1),
 (14, 15),
 (15, -1),
 (16, -1),
 (17, -1),
 (18, 19),
 (19, 20),
 (20, 21),
 (21, 22),
 (22, 23),
 (23, -1),
 (24, -1),
 (25, -1),
 (26, 27),
 (27, -1),
 (28, -1),
 (29, -1),
 (30, -1),
 (31, -1),
 (32, -1),
 (33, -1),
 (34, -1),
 (35, -1),
 (36, 37),
 (37, 38),
 (38, -1),
 (39, -1),
 (40, 41),
 (41, -1),
 (42, -1),
 (43, 44),
 (44, 45),
 (45, -1),
 (46, 47),
 (47, -1),
 (48, 49),
 (49, 50),
 (50, -1),
 (51, -1),
 (52, -1),
 (53, -1),
 (54, -1),
 (55, -1),
 (56, -1),
 (57, -1),
 (58, -1),
 (59, -1),
 (60, -1),
 (61, -1),
 (62, -1),
 (63, 64),
 (64, -1),
 (65, -1),
 (66, -1),
 (67, 68),
 (68, 69),
 (69, -1),
 (70, 71),
 (71, -1),
 (72, 73),
 (73, -1),
 (74, -1),
 (75, -1),
 (76, 77),
 (77, -1),
 (78, -1),
 (79, -1),
 (80, 81),
 (81, 82),
 (82, 83),
 (83, -1),
 (84, 85),
 (85, 86),
 (86, 87),
 (87, 88),
 (88, -1),
 (89, -1),
 (90, 91),
 (91, 92),
 (92, -1),
 (93, 94),
 (94, 95),
 (95, -1),
 (96, -1),
 (97, 98),
 (98, 99),
 (99, 100),
 (100, -1),
 (101, -1),
 (102, 103),
 (103, -1),
 (104, 105),
 (105, 106),
 (106, 107),
 (107, -1),
 (108, -1),
 (109, -1),
 (110, -1),
 (111, 112),
 (112, 113),
 (113, -1),
 (114, -1),
 (115, -1),
 (116, 117),
 (117, 118),
 (118, -1),
 (119, -1),
 (120, 121),
 (121, 122),
 (122, 123),
 (123, 124),
 (124, -1),
 (125, -1),
 (126, -1),
 (127, 128),
 (128, -1),
 (129, 130),
 (130, -1),
 (131, -1),
 (132, 133),
 (133, -1),
 (134, -1),
 (135, 136),
 (136, -1),
 (137, -1),
 (138, 139),
 (139, 140),
 (140, 141),
 (141, 142),
 (142, -1),
 (143, -1),
 (144, 145),
 (145, 146),
 (146, -1),
 (147, 148),
 (148, 149),
 (149, -1),
 (150, -1),
 (151, -1),
 (152, -1),
 (153, -1),
 (154, -1),
 (155, -1),
 (156, -1),
 (157, 158),
 (158, 159),
 (159, -1),
 (160, 161),
 (161, -1),
 (162, -1),
 (163, -1),
 (164, 165),
 (165, 166),
 (166, 167),
 (167, -1),
 (168, 169),
 (169, -1),
 (170, -1),
 (171, -1),
 (172, -1),
 (173, -1),
 (174, -1),
 (175, 176),
 (176, -1),
 (177, 178),
 (178, 179),
 (179, -1),
 (180, 181),
 (181, -1),
 (182, -1),
 (183, -1),
 (184, -1),
 (185, -1),
 (186, -1),
 (187, 188),
 (188, -1),
 (189, -1),
 (190, -1),
 (191, -1),
 (192, 193),
 (193, 194),
 (194, 195),
 (195, 196),
 (196, 197),
 (197, -1),
 (198, -1),
 (199, 200),
 (200, 201),
 (201, -1),
 (202, -1),
 (203, -1),
 (204, -1),
 (205, 206),
 (206, -1),
 (207, -1),
 (208, 209),
 (209, -1),
 (210, -1),
 (211, -1),
 (212, -1),
 (213, -1),
 (214, -1),
 (215, -1),
 (216, -1),
 (217, 218),
 (218, -1),
 (219, 220),
 (220, -1),
 (221, -1),
 (222, 223),
 (223, 224),
 (224, 225),
 (225, 226),
 (226, 227),
 (227, -1),
 (228, -1),
 (229, -1),
 (230, -1),
 (231, 232),
 (232, -1),
 (233, -1),
 (234, -1),
 (235, -1),
 (236, 237),
 (237, -1),
 (238, -1),
 (239, -1),
 (240, -1),
 (241, 242),
 (242, -1),
 (243, -1),
 (244, 245),
 (245, -1),
 (246, 247),
 (247, 248),
 (248, -1),
 (249, -1),
 (250, 251),
 (251, -1),
 (252, 253),
 (253, 254),
 (254, 255),
 (255, -1),
 (256, 257),
 (257, -1),
 (258, -1),
 (259, -1),
 (260, -1),
 (261, -1),
 (262, 263),
 (263, -1),
 (264, 265),
 (265, 266),
 (266, 267),
 (267, 268),
 (268, -1),
 (269, -1),
 (270, 271),
 (271, -1),
 (272, -1),
 (273, -1),
 (274, 275),
 (275, -1),
 (276, -1),
 (277, -1),
 (278, 279),
 (279, -1),
 (280, 281),
 (281, -1),
 (282, 283),
 (283, 284),
 (284, 285),
 (285, -1),
 (286, -1),
 (287, -1),
 (288, 289),
 (289, 290),
 (290, 291),
 (291, -1),
 (292, 293),
 (293, -1),
 (294, 295),
 (295, 296),
 (296, 297),
 (297, 298),
 (298, -1),
 (299, -1),
 (300, -1),
 (301, 302),
 (302, 303),
 (303, 304),
 (304, 305),
 (305, -1),
 (306, -1),
 (307, 308),
 (308, -1),
 (309, -1),
 (310, 311),
 (311, -1),
 (312, -1),
 (313, -1),
 (314, 315),
 (315, 316),
 (316, 317),
 (317, -1),
 (318, -1),
 (319, -1),
 (320, -1),
 (321, -1),
 (322, -1),
 (323, -1),
 (324, 325),
 (325, -1),
 (326, 327),
 (327, 328),
 (328, 329),
 (329, -1),
 (330, 331),
 (331, -1),
 (332, -1),
 (333, 334),
 (334, -1),
 (335, -1),
 (336, -1),
 (337, -1),
 (338, -1),
 (339, -1),
 (340, -1),
 (341, 231),
 (342, -1),
 (343, -1),
 (344, -1),
 (345, 346),
 (346, 347),
 (347, -1),
 (348, 349),
 (349, -1),
 (350, 351),
 (351, -1),
 (352, 353),
 (353, -1),
 (354, -1),
 (355, -1),
 (356, -1),
 (357, -1),
 (358, 359),
 (359, -1),
 (360, -1),
 (361, -1),
 (362, -1),
 (363, -1),
 (364, -1),
 (365, -1),
 (366, 367),
 (367, -1),
 (368, 369),
 (369, -1),
 (370, -1),
 (371, -1),
 (372, -1),
 (373, 374),
 (374, 375),
 (375, -1),
 (376, 377),
 (377, -1),
 (378, 379),
 (379, -1),
 (380, -1),
 (381, 382),
 (382, 383),
 (383, -1),
 (384, -1),
 (385, -1),
 (386, -1),
 (387, 388),
 (388, -1),
 (389, -1),
 (390, -1),
 (391, 392),
 (392, 393),
 (393, -1),
 (394, 395),
 (395, -1),
 (396, -1),
 (397, -1),
 (398, -1),
 (399, -1),
 (400, 401),
 (401, -1),
 (402, -1),
 (403, 404),
 (404, -1),
 (405, 406),
 (406, 407),
 (407, -1),
 (408, -1),
 (409, 410),
 (410, -1),
 (411, -1),
 (412, -1),
 (413, -1),
 (414, 415),
 (415, -1),
 (416, -1),
 (417, -1),
 (418, -1),
 (419, -1),
 (420, 421),
 (421, 422),
 (422, -1),
 (423, -1),
 (424, -1),
 (425, -1),
 (426, 427),
 (427, -1),
 (428, 429),
 (429, 430),
 (430, 431),
 (431, -1),
 (432, -1),
 (433, -1),
 (434, -1),
 (435, 436),
 (436, -1),
 (437, -1),
 (438, -1),
 (439, 56),
 (440, -1),
 (441, 442),
 (442, 443),
 (443, -1),
 (444, -1),
 (445, -1),
 (446, -1),
 (447, -1),
 (448, 449),
 (449, -1),
 (450, -1),
 (451, -1),
 (452, -1),
 (453, 454),
 (454, 455),
 (455, -1),
 (456, -1),
 (457, 458),
 (458, -1),
 (459, 460),
 (460, 461),
 (461, -1),
 (462, 463),
 (463, -1),
 (464, 465),
 (465, -1),
 (466, 467),
 (467, -1),
 (468, -1),
 (469, -1),
 (470, -1),
 (471, -1),
 (472, -1),
 (473, -1),
 (474, 475),
 (475, 476),
 (476, 477),
 (477, 478),
 (478, -1),
 (479, -1),
 (480, -1),
 (481, 482),
 (482, 483),
 (483, -1),
 (484, -1),
 (485, -1),
 (486, -1),
 (487, -1),
 (488, -1),
 (489, -1),
 (490, 491),
 (491, -1),
 (492, 493),
 (493, 494),
 (494, 495),
 (495, -1),
 (496, -1),
 (497, -1),
 (498, 499),
 (499, 500),
 (500, -1),
 (501, 502),
 (502, 503),
 (503, -1),
 (504, 505),
 (505, -1),
 (506, -1),
 (507, 508),
 (508, 509),
 (509, -1),
 (510, 511),
 (511, 512),
 (512, 513),
 (513, -1),
 (514, 515),
 (515, -1),
 (516, -1),
 (517, -1),
 (518, -1),
 (519, 520),
 (520, -1),
 (521, -1),
 (522, 523),
 (523, -1),
 (524, 525),
 (525, -1),
 (526, 527),
 (527, -1),
 (528, -1),
 (529, -1),
 (530, -1),
 (531, -1),
 (532, -1),
 (533, -1),
 (534, 535),
 (535, 536),
 (536, -1),
 (537, -1),
 (538, -1),
 (539, -1),
 (540, -1),
 (541, -1)]

In [87]:
result['next_segment']


Out[87]:
[1,
 2,
 3,
 4,
 5,
 -1,
 7,
 8,
 9,
 10,
 11,
 -1,
 13,
 14,
 15,
 16,
 17,
 -1,
 19,
 20,
 21,
 22,
 23,
 -1,
 25,
 26,
 27,
 28,
 29,
 -1,
 31,
 32,
 33,
 34,
 35,
 -1,
 37,
 38,
 39,
 40,
 41,
 -1,
 43,
 44,
 45,
 46,
 47,
 -1,
 49,
 50,
 51,
 52,
 53,
 -1,
 55,
 56,
 57,
 58,
 59,
 -1,
 61,
 62,
 63,
 64,
 65,
 -1,
 67,
 -1]

In [154]:
for target in targets: #list(targets)[:1]:
    result = results[target]
    Np = result['preictal']
    print
    d = result['dist']
    N = d.shape[0]
    print target, N, Np
    dord = np.unravel_index(d.ravel().argsort(),d.shape)
    Nsequences = N/6
    
    # find good pairs of segments that are likely to be paired in time
    next_segment = [-1]*N
    previous_segment = [-1]*N
    for i,(s1,s2) in enumerate(np.array(dord).T):
        dist = d[s1,s2]
        if next_segment[s1] != -1:
            print i,'right conflict',dist
            break
        if previous_segment[s2] != -1:
            print i,'left conflict',dist
            break
        next_segment[s1] = s2
        previous_segment[s2] = s1
#     if i < Nsequences:
#         print 'skip'
#         continue
    # check code
    for i in range(N):
        if next_segment[i] != -1:
            assert previous_segment[next_segment[i]] == i
            
    # validate
    bad_next = bad_previous = 0
    for i in range(N):
        if next_segment[i] != -1:
            if result['next_segment'][i] != next_segment[i]:
                bad_next += 1
                print 'n%d'%i,
        if previous_segment[i] != -1:
            if result['previous_segment'][i] != previous_segment[i]:
                bad_previous += 1
                print 'p%d'%i,
    print '\nbad next', bad_next, 'bad_previous', bad_previous

    # check ideal chains
    next_segment = result['next_segment']
    previous_segment = result['previous_segment']

    # find good sequences
    sequences = []
    for i in range(N):
        if previous_segment[i] == -1 and next_segment[i] != -1:
            j = i
            sequence = [j]
            while next_segment[j] != -1:
                j = next_segment[j]
                sequence.append(j)
            sequences.append(sequence)
    len_sequences = [len(sequence) for sequence in sequences]
    print '#sequences',len(sequences), '%segments that was sequenced',sum(len_sequences)/float(N), 'longest sequence', max(len_sequences)
    #print sequences
    

    def key(s, Np=Np):
        if s < Np:
            return '%s_preictal_segment_%04d.mat'%(target,s+1)
        else:
            return '%s_interictal_segment_%04d.mat'%(target,(s-Np)+1)
        
    #compute probability for sequences
    sequences_prb = []
    for sequence in sequences:
        p0 = 1.
        q0 = 1.
        p1 =0.
        p2 = 0.
        p3 = 1.
        U = 0.001 # chance of seizure detection event in a preictal segment
        V = 1-U
        for s in sequence:
            P = scores['y_est'][key(s)]
            Q = 1.-P
            p0 *= P
            q0 *= Q
            p1 += P
            if P > p2:
                p2 = P
            p3 *= (U * P/Q + (1-U))
        p0 = p0 / (p0+q0)
        p1 = p1 / len(sequence)
        p2 = p2
        p3 = 1./(1+1./p3)
#         print p0, p1, p2, p3
        sequences_prb.append(p3)
    # fix probability for segments in sequences
    for p,sequence in zip(sequences_prb,sequences):
        # all segments in the same sequence will be assigned the same probability
        for s in sequence:
            scores['post'][key(s)] = p


Dog_2 542 42
206 right conflict -0.916587535609
p56 p231 n341 n439 
bad next 2 bad_previous 2
#sequences 91 %segments that was sequenced 1.0 longest sequence 6

Dog_3 1512 72
126 left conflict -0.980392156863
n754 p1008 
bad next 1 bad_previous 1
#sequences 252 %segments that was sequenced 1.0 longest sequence 6

Dog_1 504 24
105 left conflict -0.964990683795

bad next 0 bad_previous 0
#sequences 84 %segments that was sequenced 1.0 longest sequence 6

Dog_4 901 97
508 right conflict -0.95879494432

bad next 0 bad_previous 0
#sequences 151 %segments that was sequenced 1.0 longest sequence 6

Dog_5 480 30
250 right conflict -0.879222546161
p2 n121 n223 p434 
bad next 2 bad_previous 2
#sequences 80 %segments that was sequenced 1.0 longest sequence 6

Patient_2 60 18
8 right conflict -0.983497876094

bad next 0 bad_previous 0
#sequences 10 %segments that was sequenced 1.0 longest sequence 6

Patient_1 68 18
1 right conflict -0.997638667868
n2 p4 
bad next 1 bad_previous 1
#sequences 12 %segments that was sequenced 1.0 longest sequence 6

In [155]:
roc_auc_score(scores.y, scores.post)


Out[155]:
0.90065510080577382
U AUC
original 0.86366
multiply (p0) 0.81380
mean (p1) 0.8704
max (p2) 0.86514
0.1 0.7755
0.2 0.7838
0.3 0.7958
0.4 0.8090
0.5 0.8237
0.6 0.8314
0.65 0.8354
0.7 0.8361
0.72 0.8367
0.74 0.8368
0.75 0.8368
0.77 0.8363
0.8 0.8353
0.9 0.8249

Ideal

U AUC
original 0.86366
multiply (p0) 0.87727
mean (p1) 0.89813
max (p2) 0.90776
p1+p2/2 0.90600
0.001 0.9006551

In [19]:


In [ ]: