We will use distance between test segments computed in 140926-test-signal-jump to find sequence of segments that were likely together. Armed with this fact we can take the individual proababilities of each segment and combine it to form one probability that will be used to update the probabilites of all the segments in the sequence

the sequences are found using a greedy algoirthm that stops when a conflict is detected

the probabilities of segments should be combined by multiplying them, however this did not work well. Probably because the probabilites are not well calibrated. Taking the mean had a better effect.

Suppose you have a chain of segments: $i \in 1 \ldots N $

Each segment predicts a seizure $P_i$ or not $Q_i=1-P_i$

if a chain is negative then the probability is $\prod Q_i$ if a chain is positive the situation is more complex. There is a chance $U$ that a seizure detection even has happened and $V=1-U$ it did not. I estimate $U$ to be around $0.2$. So the probability is $\prod ( U * P_i + V*Q_i)$

or $\prod Q_i \times \Pi ( U \frac{P_i}{Q_i} +V )$

the ratio of positive to negative probability is just $r = \prod ( U \frac{P_i}{Q_i} +V )$ and probability is $1/(1+1/r)$


In [1]:
%matplotlib inline
from matplotlib import pylab as pl
import cPickle as pickle
import pandas as pd
import numpy as np
import os

individual segment probablility file


In [2]:
FNAME_IN = '../submissions/141001-predict.2.csv'

updated probability file


In [3]:
FNAME_OUT = '../submissions/141001-predict.3.csv'

In [4]:
!head {FNAME_IN}


clip,preictal
Dog_1_test_segment_0001.mat,0.4655715252062138
Dog_1_test_segment_0002.mat,0.1764793356829939
Dog_1_test_segment_0003.mat,0.22345979651268183
Dog_1_test_segment_0004.mat,0.24143709332533947
Dog_1_test_segment_0005.mat,0.20580162353476036
Dog_1_test_segment_0006.mat,0.2819681289605366
Dog_1_test_segment_0007.mat,0.1461491962898217
Dog_1_test_segment_0008.mat,0.3050757248000914
Dog_1_test_segment_0009.mat,0.14862148081479315

In [5]:
scores = pd.read_csv(FNAME_IN, index_col='clip', squeeze=True)
out_scores = scores.copy()

In [6]:
scores['Dog_2_test_segment_0004.mat']


Out[6]:
0.26675745531347661

In [7]:
targets = set(['_'.join(f.split('_')[:2]) for f in scores.index.values])
targets


Out[7]:
{'Dog_1', 'Dog_2', 'Dog_3', 'Dog_4', 'Dog_5', 'Patient_1', 'Patient_2'}

In [8]:
for target in targets:
    print
    d = np.load('../data-cache/%s-test-jump-distance.npy'%target)
    N = d.shape[0]
    print target, N
    dord = np.unravel_index(d.ravel().argsort(),d.shape)
    Nsequences = N/6
    
    # find good pairs of segments that are likely to be paired in time
    next_segment = [-1]*N
    previous_segment = [-1]*N
    for i,(s1,s2) in enumerate(np.array(dord).T):
        dist = d[s1,s2]
        if next_segment[s1] != -1:
            print i,'right conflict',dist
            break
        if previous_segment[s2] != -1:
            print i,'left conflict',dist
            break
        next_segment[s1] = s2
        previous_segment[s2] = s1
#     if i < Nsequences:
#         print 'skip'
#         continue
    # check code
    for i in range(N):
        if next_segment[i] != -1:
            assert previous_segment[next_segment[i]] == i

    # find good sequences
    sequences = []
    for i in range(N):
        if previous_segment[i] == -1 and next_segment[i] != -1:
            j = i
            sequence = [j]
            while next_segment[j] != -1:
                j = next_segment[j]
                sequence.append(j)
            sequences.append(sequence)
    len_sequences = [len(sequence) for sequence in sequences]
    print '#sequences',len(sequences), '%segments that was sequenced',sum(len_sequences)/float(N), 'longest sequence', max(len_sequences)
    print sequences

    #compute probability for sequences
    sequences_prb = []
    for sequence in sequences:
        p0 = 1.
        q0 = 1.
        p1 =0.
        p2 = 0.
        p3 = 1.
        U = 0.2 # chance of seizure detection event in a preictal segment
        V = 1-U
        for s in sequence:
            P = scores['%s_test_segment_%04d.mat'%(target,s+1)]
            Q = 1.-P
            p0 *= P
            q0 *= Q
            p1 += P
            if P > p2:
                p2 = P
            p3 *= (U * P/Q + (1-U))
        p0 = p0 / (p0+q0)
        p1 = p1 / len(sequence)
        p2 = p2
        p3 = 1./(1+1./p3)
#         print p0, p1, p2, p3
        sequences_prb.append(p2)
    # fix probability for segments in sequences
    for p,sequence in zip(sequences_prb,sequences):
        # all segments in the same sequence will be assigned the same probability
        for s in sequence:
            out_scores['%s_test_segment_%04d.mat'%(target,s+1)] = p


Dog_2 1000
324 right conflict -0.932151727252
#sequences 193 %segments that was sequenced 0.517 longest sequence 6
[[3, 226, 140], [6, 386], [16, 862], [23, 956, 935], [26, 37, 276], [28, 489], [31, 614], [34, 143], [35, 175], [44, 346], [48, 552, 542], [52, 737, 268], [58, 315, 772, 124], [64, 435], [66, 938, 17], [67, 254], [74, 302, 775, 588, 673, 184], [80, 776], [86, 387, 641], [90, 265], [92, 306], [93, 598, 695, 699, 638], [95, 131, 941], [97, 907], [100, 507], [102, 865, 604], [104, 889, 798, 733, 50, 72], [109, 63], [114, 79, 87, 216], [127, 998, 170, 32], [130, 10], [133, 30], [146, 634, 281, 498], [152, 340, 606], [157, 731], [162, 213], [169, 257], [173, 954], [180, 20], [185, 341], [194, 877], [202, 600, 277, 196, 296], [206, 71], [209, 509], [214, 419], [222, 990, 203, 275], [225, 384, 582, 251], [232, 792], [233, 292], [234, 352, 820, 14], [235, 118], [236, 783, 291], [238, 578], [239, 122], [245, 546, 514, 657, 323], [250, 892, 702, 555], [253, 405], [264, 317, 939, 645], [267, 347, 46], [274, 248, 227], [284, 663], [289, 616], [293, 9], [295, 332, 466, 24, 558, 853], [297, 45], [305, 283, 917, 136, 510], [325, 545], [327, 438], [334, 298, 452], [336, 161], [339, 846], [342, 730], [348, 240], [357, 221, 764], [360, 413], [370, 608], [393, 2], [400, 139], [402, 121], [404, 447, 365, 763], [408, 707], [409, 218, 622], [411, 750, 511], [415, 841, 832, 255, 612, 581], [424, 312, 160], [430, 910], [431, 824], [440, 839], [448, 619, 810], [454, 412], [455, 101], [456, 874], [461, 423], [463, 960, 613], [467, 911], [469, 977], [474, 471], [480, 880, 304, 421, 569], [484, 392], [494, 147], [501, 278], [504, 760, 630], [506, 199, 426], [512, 401], [537, 995, 77, 442], [551, 629], [567, 75, 53], [568, 468], [570, 924], [571, 599], [572, 60], [575, 682], [580, 927], [583, 107, 905], [584, 549, 916, 285], [585, 261, 43, 754], [586, 991], [587, 154], [601, 262], [631, 125], [633, 273, 807, 150], [642, 126], [648, 453], [653, 688], [654, 186, 487], [658, 652, 566], [665, 538], [666, 177], [672, 476], [678, 635, 208], [686, 718, 649], [689, 492], [691, 908, 741], [703, 19], [705, 590, 15], [719, 345], [725, 369], [732, 906], [734, 970, 866], [740, 519], [742, 428], [748, 318], [753, 379], [755, 465, 459, 550], [758, 462, 174], [766, 714, 418, 815, 316, 851], [771, 932, 972, 191, 138, 524], [774, 816, 868], [789, 948], [794, 751], [795, 690], [803, 362, 562, 767, 508], [804, 576, 182], [812, 382], [835, 385], [838, 301], [842, 921, 770, 377], [844, 738], [845, 505, 746, 172, 724, 320], [848, 372, 592, 872], [849, 88], [850, 560], [854, 934, 231, 704], [855, 836], [857, 515], [859, 858, 785], [879, 813, 333], [884, 898, 137], [891, 171], [894, 12], [897, 117], [899, 368, 110, 779], [902, 944, 490, 488, 337], [903, 187], [904, 609], [912, 151], [915, 134], [925, 636, 444], [940, 244], [952, 540, 679, 119], [953, 356], [964, 78], [966, 39], [968, 324], [969, 266, 532], [975, 364], [976, 314], [979, 594], [981, 155], [985, 29, 801], [988, 864, 527, 800], [994, 331], [997, 811]]

Dog_3 907
33 left conflict -0.989490150071
#sequences 30 %segments that was sequenced 0.0694597574421 longest sequence 3
[[35, 445], [63, 432], [70, 368], [103, 136], [104, 325], [116, 537, 654], [133, 676], [177, 770], [318, 337], [341, 639], [344, 94], [361, 265], [370, 527], [380, 417], [436, 403], [472, 399], [515, 141], [524, 11], [557, 765], [587, 662], [613, 836, 422], [648, 698], [678, 233], [695, 840], [713, 86], [784, 441], [785, 356, 726], [855, 84], [879, 80], [890, 105]]

Dog_1 502
108 left conflict -0.9611050735
#sequences 74 %segments that was sequenced 0.362549800797 longest sequence 5
[[21, 32], [43, 20], [51, 497], [56, 24], [58, 371], [62, 42], [66, 134], [70, 495], [76, 127, 146, 106, 389], [77, 204], [79, 48], [93, 321], [95, 452], [97, 107, 219], [120, 141], [124, 88], [158, 140], [162, 287], [163, 131], [179, 194, 377, 99], [186, 473], [188, 259], [190, 463], [209, 263], [211, 117], [215, 197], [224, 31], [235, 37, 84], [236, 199], [239, 67], [240, 394], [247, 223, 249, 102], [248, 416, 132], [256, 262, 361], [260, 490], [291, 381, 92, 268], [301, 366, 342], [302, 343, 232], [303, 19], [304, 230], [307, 376], [310, 456], [320, 57, 413], [331, 424], [339, 153, 174], [340, 281, 229, 333], [341, 176, 245], [346, 26], [350, 319], [354, 181, 431, 338, 200], [360, 441, 4], [369, 439], [370, 316, 300], [375, 227, 109], [390, 383, 309], [392, 164], [393, 145], [406, 282, 161], [412, 154], [419, 195], [432, 332], [434, 267, 293, 442, 221], [445, 115], [450, 395], [453, 425], [455, 34], [459, 45], [461, 193], [476, 44], [479, 185], [481, 489, 430], [487, 206], [494, 63, 189], [499, 446, 182]]

Dog_4 990
556 right conflict -0.960661574506
#sequences 241 %segments that was sequenced 0.805050505051 longest sequence 6
[[0, 399], [1, 260, 101, 478, 415, 236], [7, 759, 635, 264], [11, 10], [13, 321, 132, 889], [14, 581, 429], [15, 24], [25, 315], [28, 8], [33, 631], [36, 964, 75, 141, 790], [38, 978, 678], [44, 875, 116], [45, 462], [46, 402, 971, 446], [48, 70, 483, 840, 986], [55, 170, 319, 767], [60, 127, 847], [63, 528], [67, 937, 987, 148, 948], [79, 126, 249], [86, 789], [87, 584], [98, 144], [103, 737, 721], [107, 664, 546, 18], [112, 827, 155, 106, 890, 244], [120, 122], [121, 573, 324, 544], [123, 383, 809, 970, 82, 333], [125, 647, 374], [128, 134], [129, 915], [135, 311], [136, 853, 417, 230], [138, 235, 613], [140, 499, 604], [142, 341, 860, 409, 655], [147, 200], [161, 896, 76], [163, 817, 648], [172, 680, 295, 68, 214], [174, 508, 488], [175, 976, 16], [177, 795, 520, 713], [179, 495], [184, 479, 829, 139, 724], [186, 22, 444, 859], [187, 585, 728, 154], [189, 855], [190, 682], [191, 89, 364], [193, 885], [197, 545, 531, 17], [203, 760], [207, 878], [208, 349, 413], [209, 143], [211, 638, 376, 102, 373], [216, 848], [220, 502, 770, 908, 985, 828], [225, 813], [233, 231, 459, 612], [251, 618], [253, 956], [255, 224, 47, 195], [256, 516, 473], [257, 658, 954], [263, 288], [268, 442, 934, 171, 241], [273, 932, 73], [279, 222, 327, 448, 729, 504], [281, 727, 406, 820], [282, 957, 497, 269, 656, 426], [283, 717], [289, 50], [294, 156], [297, 940, 455, 284], [299, 152, 183, 456], [300, 239, 926, 851], [307, 753, 862], [309, 361], [310, 377], [312, 730], [313, 662, 272], [316, 487], [318, 229, 731], [328, 740, 637, 167, 942], [329, 685, 523, 673], [335, 501], [338, 768, 703, 927, 511, 893], [340, 532], [343, 951, 943], [344, 907, 465, 652], [348, 562, 416], [350, 766, 835, 807, 837, 365], [356, 782], [357, 733, 698, 891], [362, 359], [366, 454, 428, 438], [372, 165, 923], [379, 720], [384, 640, 557], [385, 726, 526, 250], [386, 708, 911, 856, 792], [389, 355], [390, 578, 599, 706], [394, 51], [395, 110, 419, 606, 977, 217], [396, 984], [401, 286, 514, 398], [403, 162, 659, 754], [404, 846], [405, 118, 565], [407, 339, 457, 363], [422, 716], [430, 894, 711], [434, 27, 748], [441, 619], [443, 794, 843, 77, 234, 808], [450, 221, 521], [451, 449, 935, 873, 19, 592], [461, 906], [468, 470], [469, 796, 146, 226], [472, 958, 834, 761], [475, 677, 205, 601], [481, 877, 826, 973, 969, 247], [486, 825], [489, 322, 292, 552], [492, 188, 2, 505, 306, 3], [493, 561, 421, 74], [494, 966], [500, 301], [506, 901], [507, 824, 895], [509, 423], [513, 630], [515, 547, 605, 553, 52, 683], [522, 182], [524, 850, 670, 797], [527, 323], [535, 278], [536, 836, 869, 115, 503], [537, 248, 572, 905, 886, 477], [555, 920], [558, 484, 266, 979, 181], [559, 903], [564, 739], [566, 815, 124, 296], [575, 151, 71, 574], [577, 81, 653, 550], [583, 569], [589, 725, 801], [591, 803], [593, 331], [596, 694], [597, 485], [602, 265], [615, 369, 228, 892, 784, 368], [616, 517, 408], [620, 178, 567], [628, 261], [632, 883, 518], [642, 96, 665, 414, 270, 317], [643, 212], [649, 32], [650, 158, 427], [651, 668, 736, 833, 510, 149], [667, 594, 342, 360], [675, 936, 97], [684, 914], [686, 543], [688, 924, 262], [692, 83, 245, 695, 746, 541], [699, 622, 666], [700, 772, 201], [704, 482], [714, 660, 582], [715, 378], [719, 928, 435, 922, 237, 804], [723, 206], [735, 35, 466], [743, 196, 972, 702, 173], [745, 351, 391], [747, 988, 554], [749, 876, 471, 114, 866], [750, 982, 352, 744], [751, 287, 274, 5], [755, 718, 453, 168, 223], [756, 674], [757, 336], [758, 345, 491], [762, 865], [773, 669], [777, 912], [778, 285], [779, 732, 64], [780, 549, 609, 293], [781, 705, 467], [783, 830, 672, 412], [786, 533, 410, 939], [787, 290], [798, 53], [810, 690, 930, 679], [811, 938, 347], [812, 326, 219, 989, 62, 420], [814, 802], [816, 400, 246, 277, 192, 334], [818, 304, 397, 874], [819, 563, 26, 243, 325], [821, 6, 104, 105], [823, 480, 180, 199, 57, 841], [831, 463, 539], [838, 576, 579], [844, 393, 113], [845, 981, 254, 884, 381, 198], [861, 880], [863, 460, 21, 799], [864, 512, 43, 882, 267, 769], [867, 946, 707, 84], [871, 100], [879, 227, 358], [888, 644], [904, 963, 117, 587, 600, 69], [910, 742], [919, 354, 90, 625, 548], [921, 375, 586], [929, 476, 534, 218, 94, 59], [944, 909, 380, 160, 788], [949, 150, 153], [950, 629, 917, 854], [952, 785, 91], [953, 42, 298], [955, 621, 611], [959, 897, 388], [960, 519, 931], [961, 66, 464], [962, 580], [967, 654], [974, 542]]

Dog_5 191
42 left conflict -0.959496774214
#sequences 33 %segments that was sequenced 0.392670157068 longest sequence 4
[[1, 131, 105, 139], [6, 36, 140], [10, 7], [22, 112], [32, 74], [33, 23, 190], [34, 49], [38, 84, 107], [41, 21], [42, 160], [57, 133], [62, 51], [69, 188], [77, 113], [78, 158], [79, 119, 55, 2], [90, 159], [91, 115], [93, 148], [99, 43], [116, 16], [120, 180], [128, 48], [129, 75], [138, 92, 109], [144, 14], [145, 130], [147, 154], [152, 5], [161, 171], [166, 182], [177, 26, 143], [179, 187]]

Patient_2 150
1 right conflict -0.999516626264
#sequences 1 %segments that was sequenced 0.0133333333333 longest sequence 2
[[102, 24]]

Patient_1 195
10 left conflict -0.909383819823
#sequences 9 %segments that was sequenced 0.0974358974359 longest sequence 3
[[3, 54], [13, 179, 19], [86, 15], [97, 72], [124, 127], [147, 172], [183, 160], [185, 2], [189, 113]]

In [9]:
out_scores.to_csv(FNAME_OUT, header=True)

In [10]:
!paste {FNAME_IN} {FNAME_OUT} | head


clip,preictal	clip,preictal
Dog_1_test_segment_0001.mat,0.4655715252062138	Dog_1_test_segment_0001.mat,0.4655715252062138
Dog_1_test_segment_0002.mat,0.1764793356829939	Dog_1_test_segment_0002.mat,0.1764793356829939
Dog_1_test_segment_0003.mat,0.22345979651268183	Dog_1_test_segment_0003.mat,0.22345979651268186
Dog_1_test_segment_0004.mat,0.24143709332533947	Dog_1_test_segment_0004.mat,0.2414370933253395
Dog_1_test_segment_0005.mat,0.20580162353476036	Dog_1_test_segment_0005.mat,0.2636619661388496
Dog_1_test_segment_0006.mat,0.2819681289605366	Dog_1_test_segment_0006.mat,0.2819681289605366
Dog_1_test_segment_0007.mat,0.1461491962898217	Dog_1_test_segment_0007.mat,0.1461491962898217
Dog_1_test_segment_0008.mat,0.3050757248000914	Dog_1_test_segment_0008.mat,0.3050757248000914
Dog_1_test_segment_0009.mat,0.14862148081479315	Dog_1_test_segment_0009.mat,0.14862148081479315

In [11]:
out_scores['Dog_2_test_segment_0004.mat']


Out[11]:
0.27385941543177666

In [12]:
df = pd.DataFrame()
df['in'] = pd.read_csv(FNAME_IN, index_col='clip', squeeze=True) #64
df['out'] = pd.read_csv(FNAME_OUT, index_col='clip', squeeze=True)

In [13]:
pd.scatter_matrix(df,figsize=(6, 6), diagonal='kde');



In [ ]: