In [1]:
import pandas as pd
import psycopg2
import numpy as np
from scipy.stats.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
In [2]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from PlotWords import plot_word
from Matrix import convert_matrix
In [3]:
def savePearson(pearson,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
insert_statement=('INSERT INTO wordspearson(word,p1,p2,patient,deadPatient)'
' SELECT unnest( %(word)s ) ,'
' unnest( %(p1)s) ,'
' unnest( %(p2)s) ,'
' unnest( %(patient)s) ,'
' unnest( %(deadPatient)s)')
word=[r['word'] for r in pearson]
p1=[r['p1'] for r in pearson]
p2=[r['p2'] for r in pearson]
patient=[r['patient'] for r in pearson]
deadPatient=[r['deadPatient'] for r in pearson]
# print(cur.mogrify(insert_statement,locals()))
cur.execute(insert_statement,locals())
conn.commit()
cur.close()
conn.close()
In [4]:
def selectPearson(dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='SELECT word,p1,p2,patient,deadpatient FROM wordspearson'
# print(cur.mogrify(select_statement,locals()))
cur.execute(select_statement)
select = []
for row in cur :
patient=row[3]
cuantosMueren ="{0:.2%}".format(row[4]/patient)+" de "+str(patient)
select.append({"word":row[0],"p1":row[1],"p2":row[2],"cuantosMueren":cuantosMueren})
cur.close()
conn.close()
return sorted(select, key=itemgetter('p1'), reverse=True)
In [5]:
def countPatients(word,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='''SELECT count(1),sum(isalive) FROM matrix m LEFT JOIN subjectwords s
ON m.subject_id=s.subject_id where m.word = %s GROUP BY m.word'''
# print(cur.mogrify(select_statement,(word,)))
cur.execute(select_statement,(word,))
select = {}
for row in cur :
select = {"patient":row[0],"deadPatient":row[1],}
cur.close()
conn.close()
return select
In [6]:
def cleanPearson(dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
delete_statement='DELETE FROM wordspearson'
# print(cur.mogrify(delete_statement,locals()))
cur.execute(delete_statement,locals())
conn.commit()
cur.close()
conn.close()
In [7]:
table = convert_matrix(sumvals=False)
table
Out[7]:
In [8]:
survived = table.index.labels[1].tolist()
patients = table.values
columns = list(table.columns.values)
In [9]:
cleanPearson()
print("cleanedPearson now reading",len(columns),"columns")
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
word = columns[i]
count = countPatients(word)
pearsonList.append({'word':word,'p1':pearson[0],'p2':pearson[1],'patient':count['patient'],'deadPatient':count['deadPatient']})
# print(i,end=", ")
print(" preparedToSavePearson")
savePearson(pearsonList)
print("savedPearson")
In [10]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
In [11]:
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)
In [13]:
model,accuracy_score,logit_roc_auc = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
In [14]:
model
Out[14]:
In [7]:
from operator import itemgetter
pearsonDict = selectPearson()
In [8]:
plot_word(pearsonDict[:10])
In [9]:
df = pd.DataFrame(pearsonDict)
df = df.set_index('word')
df = df.sort_values(['p1'], ascending=[False])
df
Out[9]:
In [ ]: