In [1]:
import psycopg2
import gc
from psycopg2.extensions import register_adapter, AsIs
from time import time
import matplotlib.pyplot as plt
import numpy as np

In [2]:
def obtainMaxRecords(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT replace(split_part(record, '/',3),'s',''),max(record) "
                      " FROM rstq "
                      " WHERE cast(replace(split_part(record, '/',3),'s','') as integer) "
                      " NOT IN (select subject_id from subjectrecord) "
                      " AND centroid IS NOT NULL"
                      " GROUP BY split_part(record, '/',3)"
    )
    cur.execute(select_stament)
    subject = []
    for row in cur :
        subject.append({"subject_id":int(row[0]),"record":row[1]})
    conn.close()
    return subject

In [3]:
def insert(words,table,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    insert_statement = 'INSERT into '+table+' (%s) values %s'
    columns = words.keys()
    values = [words[column] for column in columns]
#    print(cur.mogrify(insert_statement, (AsIs(','.join(columns)), tuple(values))))
    cur.execute(insert_statement, (AsIs(','.join(columns)), tuple(values)))
    conn.commit()
    cur.close()
    conn.close()

In [4]:
def fillsubjectRecord() :
    for subject in obtainMaxRecords() :
        insert(subject,"subjectrecord")

In [5]:
def obtainSubjects(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT subject_id,record "
                      " FROM subjectrecord"
    )
    cur.execute(select_stament)
    subject = []
    for row in cur :
        subject.append({"subject_id":int(row[0]),"record":row[1]})
    cur.close()
    conn.close()
    return subject

In [6]:
def patientIsAlive(patient,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT dod "
                      " FROM patients WHERE subject_id = "+str(patient)+" LIMIT 1"
    )
    cur.execute(select_stament)
    select = []
    for row in cur :
        select.append(1 if row[0] is not None else 0 )
    cur.close()
    conn.close()
    return select

In [7]:
def obtainWord(subject,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT centroid "
                      " FROM rstq WHERE record='"+str(subject)+"' ORDER BY r_s"
    )
    cur.execute(select_stament)
    centroids = ""
    for row in cur :
        centroid = row[0]
        if centroid is not None :
            centroids= centroids+centroid
    conn.close()
    return centroids

In [8]:
def deleteWord(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = "DELETE FROM subjectwords"
    cur.execute(select_stament)
    conn.commit()
    cur.close()
    conn.close()

In [9]:
def insertSubjectWords(words,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    insert_statement=('INSERT INTO subjectwords(word,subject_id,isalive)'
                      ' SELECT unnest( %(word)s ) ,'
                      ' unnest( %(subject_id)s) ,'
                      ' unnest( %(isalive)s)')
    word=[r['word'] for r in words]
    subject_id=[r['subject_id'] for r in words]
    isalive=[r['isalive'] for r in words]
#    print(cur.mogrify(insert_statement,locals()))
    cur.execute(insert_statement,locals())
    conn.commit()
    cur.close()
    conn.close()

In [10]:
def createListOfWords() :
    subjects = obtainSubjects()
    lenSubjects = len(subjects)
    deleteWord()
    i,j=0,0
    words = []
    for subject in subjects :
        subject_id = subject['subject_id']
        isAlive = patientIsAlive(subject_id)
        if isAlive != [] :
            j=j+1
            word = obtainWord(subject['record'])
            if word is not None:
                words.append({'subject_id':subject_id,'word':word,'isalive':isAlive[0]})
    insertSubjectWords(words)
    print()
    print("In a list of "+str(lenSubjects)+" we know the status of "+str(j)+" patients")

In [11]:
def existMatrix(word,subject,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT 1 "
                      " FROM matrix WHERE subject_id='"+str(subject)+"' AND word='"+str(word)+"'"
    )
    cur.execute(select_stament)
    exist = False
    for row in cur :
        exist = True
    cur.close()
    conn.close()
    return exist

In [ ]:
def printGroups(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT count(1),LENGTH(word) FROM subjectwords GROUP BY LENGTH(word) ORDER BY LENGTH(word)"
    )
    cur.execute(select_stament)
    words = []
    maximun = 0
    for row in cur :
        words.append({"subjects":row[0],"wordSize":row[1]})
        maximun = maximun if maximun>row[1] else row[1]
    cur.close()
    conn.close()
    division = maximun/8
    means_men = {'0':0,'1':0,'2':0,'3':0,'4':0,'5':0,'6':0,'7':0,'8':0,}
    for r in words :
        for x in range(0, 9):
            if(r['wordSize']>=division*x and r['wordSize']<division*(x+1)):
                means_men[str(x)] = means_men[str(x)]+r['subjects']
    columns = sorted(means_men.keys())
    means_men = [means_men[column] for column in sorted(means_men)]
    index = np.arange(len(means_men))
    bar_width = 0.35
    plt.bar(index,means_men,label='Subjects')
    plt.xlabel('Lenght of Words')
    plt.ylabel('Subjects')
    plt.title('Subjects by Lenght of words')
    plt.xticks(index + bar_width / 2, (columns))
    plt.legend()
#    plt.tight_layout()
    plt.show()

In [ ]:
t0 = time()
fillsubjectRecord()
print("done in %0.3fs." % (time() - t0))
gc.collect()
t0 = time()
createListOfWords()
print("done in %0.3fs." % (time() - t0))


done in 92.950s.

In [ ]:
printGroups()

In [ ]: