In [1]:
import pandas as pd
import psycopg2
import numpy as np
from scipy.stats.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
In [2]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from PlotWords import plot_word
from Matrix import convert_matrix
from NMF import auc_model
In [3]:
def savePearson(pearson,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
insert_statement=('INSERT INTO wordspearson(word,p1,p2,patient,deadPatient)'
' SELECT unnest( %(word)s ) ,'
' unnest( %(p1)s) ,'
' unnest( %(p2)s) ,'
' unnest( %(patient)s) ,'
' unnest( %(deadPatient)s)')
word=[r['word'] for r in pearson]
p1=[r['p1'] for r in pearson]
p2=[r['p2'] for r in pearson]
patient=[r['patient'] for r in pearson]
deadPatient=[r['deadPatient'] for r in pearson]
# print(cur.mogrify(insert_statement,locals()))
cur.execute(insert_statement,locals())
conn.commit()
cur.close()
conn.close()
In [4]:
def selectPearson(dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='SELECT word,p1,p2,patient,deadpatient FROM wordspearson'
# print(cur.mogrify(select_statement,locals()))
cur.execute(select_statement)
select = []
for row in cur :
patient=row[3]
cuantosMueren ="{0:.2%}".format(row[4]/patient)+" de "+str(patient)
select.append({"word":row[0],"p1":row[1],"p2":row[2],"cuantosMueren":cuantosMueren})
cur.close()
conn.close()
return select
In [5]:
def countPatients(word,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='''SELECT count(1),sum(isalive) FROM matrix m LEFT JOIN subjectwords s
ON m.subject_id=s.subject_id where m.word = %s GROUP BY m.word'''
# print(cur.mogrify(select_statement,(word,)))
cur.execute(select_statement,(word,))
select = {}
for row in cur :
select = {"patient":row[0],"deadPatient":row[1],}
cur.close()
conn.close()
return select
In [6]:
def cleanPearson(dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
delete_statement='DELETE FROM wordspearson'
# print(cur.mogrify(delete_statement,locals()))
cur.execute(delete_statement,locals())
conn.commit()
cur.close()
conn.close()
In [7]:
table = convert_matrix(sumvals=False)
print(table.shape)
table
Out[7]:
In [ ]:
survived = table.index.labels[1].tolist()
patients = table.values
columns = list(table.columns.values)
In [ ]:
cleanPearson()
print("cleanedPearson now reading",len(columns),"columns")
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
word = columns[i]
count = countPatients(word)
pearsonList.append({'word':word,'p1':pearson[0],'p2':pearson[1],'patient':count['patient'],'deadPatient':count['deadPatient']})
print(i,end=", ")
print(" preparedToSavePearson")
savePearson(pearsonList)
print("savedPearson")
In [ ]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
In [ ]:
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)
In [ ]:
model,accuracy_score = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
In [ ]:
logit_roc_auc = auc_model("Logistic",model,patients_test,survived_test)
In [ ]:
model
In [ ]:
from operator import itemgetter
pearsonDict = selectPearson()
pearsonDict = sorted(pearsonDict, key=itemgetter('p1'), reverse=True)
In [ ]:
print(pearsonDict[:5])
plot_word(pearsonDict[:5])
In [ ]: