In [3]:
import psycopg2
import gc
from psycopg2.extensions import register_adapter, AsIs
from time import time
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict

In [2]:
def obtainMaxRecords(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = '''SELECT replace(split_part(record, '/',3),'s',''),max(record)
    FROM rstq
    WHERE cast(replace(split_part(record, '/',3),'s','') as integer)
    NOT IN (select subject_id from subjectrecord)
    AND centroid IS NOT NULL
    GROUP BY split_part(record, '/',3)'''
    cur.execute(select_stament)
    subject = []
    for row in cur :
        subject.append({"subject_id":int(row[0]),"record":row[1]})
    conn.close()
    return subject

In [3]:
def insert(words,table,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    insert_statement = 'INSERT into '+table+' (%s) values %s'
    columns = words.keys()
    values = [words[column] for column in columns]
#    print(cur.mogrify(insert_statement, (AsIs(','.join(columns)), tuple(values))))
    cur.execute(insert_statement, (AsIs(','.join(columns)), tuple(values)))
    conn.commit()
    cur.close()
    conn.close()

In [4]:
def fillsubjectRecord() :
    for subject in obtainMaxRecords() :
        insert(subject,"subjectrecord")

In [5]:
def obtainSubjects(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = '''SELECT subject_id,record FROM subjectrecord'''
    cur.execute(select_stament)
    subject = []
    for row in cur :
        subject.append({"subject_id":int(row[0]),"record":row[1]})
    cur.close()
    conn.close()
    return subject

In [6]:
def patientIsAlive(patient,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT dod "
                      " FROM patients WHERE subject_id = "+str(patient)+" LIMIT 1"
    )
    cur.execute(select_stament)
    select = []
    for row in cur :
        select.append(1 if row[0] is not None else 0 )
    cur.close()
    conn.close()
    return select

In [7]:
def obtainWord(subject,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT centroid "
                      " FROM rstq WHERE record='"+str(subject)+"' ORDER BY r_s"
    )
    cur.execute(select_stament)
    centroids = ""
    for row in cur :
        centroid = row[0]
        if centroid is not None :
            centroids= centroids+centroid
    if(len(centroids)<3600): centroids = None
    conn.close()
    return centroids

In [8]:
def deleteWord(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = "DELETE FROM subjectwords"
    cur.execute(select_stament)
    conn.commit()
    cur.close()
    conn.close()

In [9]:
def insertSubjectWords(words,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    insert_statement=('INSERT INTO subjectwords(word,subject_id,isalive)'
                      ' SELECT unnest( %(word)s ) ,'
                      ' unnest( %(subject_id)s) ,'
                      ' unnest( %(isalive)s)')
    word=[r['word'] for r in words]
    subject_id=[r['subject_id'] for r in words]
    isalive=[r['isalive'] for r in words]
#    print(cur.mogrify(insert_statement,locals()))
    cur.execute(insert_statement,locals())
    conn.commit()
    cur.close()
    conn.close()

In [10]:
def createListOfWords() :
    subjects = obtainSubjects()
    lenSubjects = len(subjects)
    deleteWord()
    i,j=0,0
    words = []
    for subject in subjects :
        subject_id = subject['subject_id']
        print(subject_id)
        isAlive = patientIsAlive(subject_id)
        if isAlive != [] :
            j=j+1
            word = obtainWord(subject['record'])
            if word is not None:
                words.append({'subject_id':subject_id,'word':word,'isalive':isAlive[0]})
    insertSubjectWords(words)
    print()
    print("In a list of "+str(lenSubjects)+" we know the status of "+str(j)+" patients")

In [11]:
def existMatrix(word,subject,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT 1 "
                      " FROM matrix WHERE subject_id='"+str(subject)+"' AND word='"+str(word)+"'"
    )
    cur.execute(select_stament)
    exist = False
    for row in cur :
        exist = True
    cur.close()
    conn.close()
    return exist

In [65]:
def printGroups(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT count(1),LENGTH(word) FROM subjectwords GROUP BY LENGTH(word) ORDER BY LENGTH(word)"
    )
    cur.execute(select_stament)
    words = []
    maximun,minimun = 0,1000000000
    for row in cur :
        words.append({"subjects":row[0],"wordSize":row[1]})
        maximun = maximun if maximun>row[1] else row[1]
        minimun = minimun if minimun<row[1] else row[1]
    cur.close()
    conn.close()
    print("maximun",maximun,"minimun",minimun)
    minimun-=1
    division = (maximun-minimun)/8
    means_men = defaultdict(int) #'0':0,'1':0,'2':0,'3':0,'4':0,'5':0,'6':0,'7':0,'8':0,
    for r in words :
        for x in range(0, 11):
            floor = (division*x)+minimun+1
            top = division*(x+1)+minimun
            if(r['wordSize']>=floor and r['wordSize']<top):
                floor = str(int(floor)).zfill(5)
                top = str(int(top)).zfill(5)
                means_men[floor+"-"+top] += r['subjects']
    columns = sorted(means_men.keys())
    means_men = [means_men[column] for column in sorted(means_men)]
    index = np.arange(len(means_men))
    bar_width = 0.35
    fig, ax = plt.subplots() 
    for i, v in enumerate(means_men):
        ax.text(v-10,i-0.1, str(v), color='white', fontweight='bold')
    plt.barh(index,means_men,label='Pacientes')
    plt.ylabel('Número de latidos')
    plt.xlabel('Pacientes')
    plt.title('Pacientes por Número de latidos')
    plt.yticks(index + bar_width / 2, (columns))
    plt.legend()
#    plt.tight_layout()
    plt.show()

In [13]:
def printWords(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT count(1),LENGTH(word) FROM subjectwords GROUP BY LENGTH(word) ORDER BY LENGTH(word)"
    )
    cur.execute(select_stament)
    x,y = [],[]
    for row in cur :
        x.append(row[0])
        y.append(row[1])
    cur.close()
    conn.close()
    bar_width = 0.35
    plt.plot(x, y, 'ro')
    plt.ylabel('Número de latidos')
    plt.xlabel('Pacientes')
    plt.title('Pacientes por Número de latidos')
    plt.legend()
#    plt.tight_layout()
    plt.show()

In [13]:
t0 = time()
fillsubjectRecord()
print("fillsubjectRecord done in %0.3fs." % (time() - t0))
gc.collect()


fillsubjectRecord done in 96.242s.
Out[13]:
0

In [14]:
t0 = time()
createListOfWords()
print("createListOfWords done in %0.3fs." % (time() - t0))


5223
15079
20968
20269
11679
23922
20766
19372
3498
14897
4018
5114
6692
10534
11318
3745
16038
10651
9664
23336
1995
11622
2996
16122
4041
20984
15021
15315
10241
4870
15330
20345
5274
19297
3780
20372
11710
14936
18733
9338
3744
19333
24228
12821
14899
18621
14919
9332
17702
19834
18402
4317
12856
3992
16127
4064
14532
5382
11055
23413
19655
3245
11219
5642
15982
15382
18739
11512
20474
20238
14240
19145
13136
14533
11268
9425
18818
9335
9615
3748
18910
3441
21030
19975
4568
11467
15298
12878
14669
11604
5496
4474
23318
5199
3192
5056
16121
11388
5459
11509
4324
4865
3506
15208
15733
13002
15110
20860
23178
3158
3606
24218
3358
24076
5506
4688
4448
3622
4655
11291
18169
14458
10656
23238
3593
4685
14322
12849
24063
3286
17488
24124
15271
9630
15093
15329
15595
4254
20856
21025
19757
9341
3516
15426
4436
19342
14766
18524
17929
23517
10782
9593
19538
11191
18970
18875
2846
4431
10635
4346
15385
18688
3619
15524
19700
17582
14822
17697
10655
10766
4806
10689
15569
3914
3340
19296
5062
14947
9372
5037
20705
24030
19718
12739
14524
5205
19981
17765
10686
15509
19765
20095
20986
10433
21071
14702
5201
13033
17645
5237
15052
3024
9286
12968
4347
4852
15624
11323
12807
23890
20846
18982
14946
18082
5451
14592
15619
4786
11431
11061
23384
3889
19208
3612
15583
20840
14772
18393
3386
14836
19055
3165
20582
19087
4833
15150
4076
9268
13183
12798
18975
4566
5254
18365
5442
20190
17748
3746
16032
15082
4142
2784
4420
15703
5591
20448
11243
3884
19348
4802
15185
19430
15701
17722
18685
14486
4263
12941
3133
4778
9366
23344
3640
5321
4462
18614
5163
9389
9269
14544
11279
11590
10847
3863
15904
20795
9278
18358
12982
10475
14391
19093
23944
17696
18786
10738
17756
18219
23130
19330
3171
15959
23166
3372
23539
23468
14935
10061
15997
20879
14542
11635
4401
15809
15457
3521
5239
9393
11688
18205
18815
4593
9330
14579
18126
15797
25466
5126
17976
3929
15124
18681
17691
5619
4266
18225
17997
20922
11421
5607
11464
5175
3278
18413
14298
4261
19498
19248
14561
2917
4068
4808
4481
17472
10856
3673
11360
18243
17671
13049
12772
16002
16112
9642
15464
17774
10906
14828
5604
12752
9526
18239
14938
3798
18429
3995
15144
19099
10939
14784
9555
15247
5400
9575
4351
15458
3515
3302
18166
19999
15640
10924
4664
4369
3680
5023
20450
2893
10289
24227
19418
23401
23550
23552
14205
11591
11261
20316
3052
20763
5289
18595
19246
14863
21048
10748
20658
18727
12903
3214
11280
10275
15654
17589
14714
3821
21050
10432
13013
17795
4641
10769
3321
15143
18322
19053
18401
18139
10872
17617
18998
19563
4847
3287
19936
16139
17743
10638
15924
4633
23097
10455
14325
11004
19102
14874
21152
5343
17666
3695
23200
2968
12987
21147
9473
11727
4904
10595
2827
11667
4784
5620
10487
4805
10510
24185
20407
2787
3586
14346
14855
15652
4656
12947
3920
15389
3764
12974
4338
20181
19931
4679
17948
14299
20471
3533
14824
19603
5198
19675
20479
20354
10152
19411
18229
4113
18676
14584
15023
24133
10250
24142
20482
15332
23298
11161
15974
15903
5376
18167
21115
20564
3571
14263
23292
24152
19578
11342
10188
19465
11684
21138
3768
3853
9289
15181
18852
10785
20196
19608
11137
23474
11638
4894
12942
23470
10842
11546
4860
14251
24064
15168
10342
19649
23193
14529
3554
10995
10552
4409
23448
19442
19771
15531
16046
4264
14539
23339
2858
17483
10205
20101
19618
14918
11143
10653
5417
5609
9363
19445
4252
4771
21072
23120
10710
14291
19965
3794
11473
10564
13146
24129
9354
18597
19848
20128
10652
4350
16055
14622
10604
9595
12900
4059
18812
18108
3886
15480
16013
14410
14995
5645
15779
3021
4329
3261
23363
5332
4136
9398
5637
19031
10209
14233
19811
15883
3513
18300
11242
20936
18988
23451
4915
15646
18696
9483
15268
21100
4829
5549
17497
3623
19727
15302
19604
14975
2946
3221
9434
11086
3617
15631
15557
11703
13191
10464
4490
15749
4599
12920
23510
20486
2921
13052
11694
13099
15877
20742
20794
14679
14386
15864
9361
18035
15669
15727
15964
5336
10362
5345
3633
10876
4180
23907
20612
4742
15226
4909
19371
15911
11609
18942
2834
16129
10525
11043
19213
3759
2919
14204
21123
15567
15687
18546
19005
23180
4587
15545
3473
20129
15465
19016
11744
3466
24232
2981
11187
20246
19726
20643
3512
19346
24004
4077
4565
23893
3250
3474
9324
16117
20929
11341
23201
4362
17735
10391
10611
135
151
263
177
214
11328
8557
16490
15198
9637
9958
21449
1160
2224
17372
743
7894
7084
43529
9732
9251
33
6901
974
2442
9965
16639
15141
1802
9460
2185
7492
8654
7685
7517
16949
9518
6649
793
16748
9297
7183
1908
10186
16360
1004
16709
42274
8347
10096
1744
8363
12167
2492
6889
42460
7512
8698
8415
8548
8115
8990
1924
7400
17456
8985
1528
9882
42519
18846
8749
6841
16172
3642
8040
6868
21443
8272
17810
23048
21438
7799
6914
11764
8084
43446
2049
8524
1222
16992
21538
7432
7136
6607
9891
7225
42510
43233
2148
14551
608
42477
17246
14611
17401
7263
7448
42255
1075
7320
8569
21431
9358
9672
42709
4188
3935
1531
7422
14478
2498
10013
9967
42410
17616
9356
8949
9070
7497
10042
427
14495
9105
8814
28065
11596
2172
7175
736
11907
17075
42261
1144
11827
12078
283
42782
43501
7860
7651
7521
7262
14763
42721
2246
1123
17092
21561
42285
12673
12727
85
17216
3057
7542
8467
7644
6669
17182
21521
2340
11442
11018
638
42492
2274
7981
7910
10077
9998
17218
43296
5766
11931
1226
7442
625
8734
17058
749
7339
99992
2990
7612
886
8336
7172
2317
8533
11247
8932
8087
515
7666
328
7786
12217
1941
16192
6876
41962
10928
90495
23047
14862
11998
9148
9951
7234
41902
6673
16961
17798
7683
8870
1029
6075
7567
17412
9667
12319
7470
462
23034
8466
16511
18403
9783
7328
42574
2467
7153
8207
8670
14909
8674
7522
7289
7265
682
408
9005
377
6917
6981
10247
7224
7138
8996
12712
17522
18740
7125
11757
571
7472
9043
42694
20
8432
9685
7977
8871
8009
317
11877
16161
12679
9036
2228
8461
10030
11988
6636
618
17262
1459
9176
16504
7217
17018
12663
5933
11951
279
17457
14651
16447
7009
8406
9871
18837
8936
8121
7782
12704
1569
14448
8984
8122
9949
12215
17366
7303
14777
16881
4859
17717
8964
8198
7360
16909
2514
952
12708
6708
7528
9708
6983
9494
11372
7048
8989
16336
91428
11852
21575
8231
8573
9847
42310
7452
9253
8451
15333
43459
42404
1414
1650
12251
8318
21517
8568
9705
6945
16280
7798
7371
16607
9021
368

In a list of 1098 we know the status of 1074 patients
createListOfWords done in 2461.363s.

In [66]:
printGroups()


maximun 29296 minimun 3636

In [14]:
printWords()


/usr/local/lib/python3.4/dist-packages/matplotlib/axes/_axes.py:545: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "

In [ ]: