In [1]:
import pandas as pd
import psycopg2
import numpy as np
from scipy.stats.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
import sys 
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from PlotWords import plot_word
from Matrix import convert_matrix
from NMF import auc_model

In [3]:
def savePearson(pearson,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    insert_statement=('INSERT INTO wordspearson(word,p1,p2,patient,deadPatient)'
                      ' SELECT unnest( %(word)s ) ,'
                      ' unnest( %(p1)s) ,'
                      ' unnest( %(p2)s) ,'
                      ' unnest( %(patient)s) ,'
                      ' unnest( %(deadPatient)s)')
    word=[r['word'] for r in pearson]
    p1=[r['p1'] for r in pearson]
    p2=[r['p2'] for r in pearson]
    patient=[r['patient'] for r in pearson]
    deadPatient=[r['deadPatient'] for r in pearson]
#    print(cur.mogrify(insert_statement,locals()))
    cur.execute(insert_statement,locals())
    conn.commit()
    cur.close()
    conn.close()

In [4]:
def selectPearson(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_statement='SELECT word,p1,p2,patient,deadpatient FROM wordspearson'
#    print(cur.mogrify(select_statement,locals()))
    cur.execute(select_statement)
    select = []
    for row in cur :
        patient=row[3]
        cuantosMueren ="{0:.2%}".format(row[4]/patient)+" de "+str(patient)
        select.append({"word":row[0],"p1":row[1],"p2":row[2],"cuantosMueren":cuantosMueren})
    cur.close()
    conn.close()
    return select

In [5]:
def countPatients(word,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_statement='''SELECT count(1),sum(isalive) FROM matrix m LEFT JOIN subjectwords s 
    ON m.subject_id=s.subject_id where m.word = %s GROUP BY m.word'''
#    print(cur.mogrify(select_statement,(word,)))
    cur.execute(select_statement,(word,))
    select = {}
    for row in cur :
        select = {"patient":row[0],"deadPatient":row[1],}
    cur.close()
    conn.close()
    return select

In [6]:
def cleanPearson(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    delete_statement='DELETE FROM wordspearson'
#    print(cur.mogrify(delete_statement,locals()))
    cur.execute(delete_statement,locals())
    conn.commit()
    cur.close()
    conn.close()

In [7]:
table = convert_matrix(sumvals=False)
print(table.shape)
table


(590, 18683)
(590, 18683)
Out[7]:
aaaaac aaaaad aaaaae aaaaaf aaaaag aaaaah aaaaai aaaaaj aaaabb aaaabc ... kkb kke lab lbb lbbb lbbc lcc leb lib libb
subject_id isAlive
20 0 0 1 0 1 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
135 1 0 1 0 1 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
151 1 1 1 0 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
177 1 0 1 0 1 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
214 1 1 1 0 1 1 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
263 1 0 1 0 0 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
279 1 0 1 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
283 1 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
368 1 1 1 0 1 1 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
377 1 1 1 0 1 1 1 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
408 1 0 1 0 1 1 0 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
462 0 0 1 0 1 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
618 1 0 1 0 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
638 1 1 1 0 1 1 1 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
682 1 1 1 1 1 1 1 0 1 1 1 ... 0 0 0 0 0 0 0 0 0 0
736 0 0 0 1 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
743 1 1 1 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
749 1 1 1 0 1 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
793 1 1 1 0 1 1 1 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
886 1 0 1 0 1 1 1 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
952 1 1 1 0 1 1 1 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
974 0 1 1 0 1 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
1004 1 0 1 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
1075 1 1 1 0 1 0 1 0 1 1 1 ... 0 0 0 0 0 0 0 0 0 0
1144 0 1 1 0 1 1 1 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
1160 0 0 1 0 1 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1222 0 0 1 0 1 1 1 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
1226 1 1 1 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
1459 0 0 1 0 1 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
1528 1 1 1 0 1 1 1 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
23178 1 1 1 0 1 1 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23193 0 1 1 0 1 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23200 0 1 0 0 0 0 0 0 0 1 1 ... 0 1 0 1 1 0 0 1 0 0
23298 0 1 1 1 1 0 1 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23336 1 1 1 0 0 0 1 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23339 0 0 1 0 0 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
23363 1 1 1 0 1 1 1 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
23384 0 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
23401 1 0 1 0 1 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
23451 1 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
23468 1 1 1 0 1 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23474 1 1 1 0 1 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23510 1 0 1 0 1 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
23944 1 1 0 0 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
24004 1 1 1 0 1 0 1 0 1 1 1 ... 0 0 0 0 0 0 0 0 0 0
24030 0 0 1 0 0 1 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
24076 1 0 1 0 1 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 0
24129 1 0 1 0 0 0 0 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
24133 0 1 1 0 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
24142 1 1 1 0 1 1 1 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
24152 1 0 1 1 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
24185 1 1 1 0 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
24227 0 1 1 0 1 1 0 0 1 1 1 ... 0 0 0 0 0 0 0 0 0 0
25466 0 0 1 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
41962 1 0 1 0 1 1 1 0 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
42255 1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
42261 0 0 1 0 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
42410 0 1 1 1 0 0 0 0 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
42492 0 1 1 0 1 1 0 1 0 1 1 ... 0 0 0 0 0 0 0 0 0 0
43459 0 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0

590 rows × 18683 columns


In [ ]:
survived = table.index.labels[1].tolist()
patients = table.values
columns = list(table.columns.values)

In [ ]:
cleanPearson()
print("cleanedPearson now reading",len(columns),"columns")
pearsonList = []
for i in range(len(columns)):
    pearson = pearsonr(patients[:,i],survived)
    word = columns[i]
    count = countPatients(word)
    pearsonList.append({'word':word,'p1':pearson[0],'p2':pearson[1],'patient':count['patient'],'deadPatient':count['deadPatient']})
    print(i,end=", ")
print(" preparedToSavePearson")
savePearson(pearsonList)
print("savedPearson")

In [ ]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)

In [ ]:
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)

In [ ]:
model,accuracy_score = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)

In [ ]:
logit_roc_auc = auc_model("Logistic",model,patients_test,survived_test)

In [ ]:
model

In [ ]:
from operator import itemgetter
pearsonDict = selectPearson()
pearsonDict = sorted(pearsonDict, key=itemgetter('p1'), reverse=True)

In [ ]:
print(pearsonDict[:5])
plot_word(pearsonDict[:5])

In [ ]: