In [1]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from PlotWords import plot_word
from Matrix import convert_matrix
from NMF import generateNMF,auc_model,find_best_NMF
In [2]:
def findpatient(message,table,beats):
groupsize = table.shape[0]
deads = sum(table.index.labels[1].tolist())
percent = -1 if groupsize==0 else deads/groupsize
if groupsize!=0:
print(message,"groupsize:%d"%groupsize,"deads:%d"%deads,"percent:{0:.2%}".format(percent),beats)
In [3]:
import psycopg2
def selectPearson(word,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='SELECT patient,deadpatient,p1 FROM wordspearson WHERE word = %s'
# print(cur.mogrify(select_statement,locals()))
cur.execute(select_statement,(word,))
select = {}
for row in cur :
patient=row[0]
select = {'word':word,"pacientes":patient,"porcentaje de muertos":"{0:.2%}".format(row[1]/patient),"p1":row[2]}
cur.close()
conn.close()
return select
In [4]:
def print_top_words(model, feature_names,topic_index=None):
result = []
for topic_idx, topic in enumerate(components):
features = [{"word":feature_names[i],"p1":topic[i]}
for i in topic.argsort()[:-1] if topic[i] > 0]
if features!=[] and (topic_index==None or topic_index==topic_idx): result.append({"topic":topic_idx,"features":features})
return result
In [5]:
def addValue(key,my_dict):
if key in my_dict:
my_dict[key] += 1
else:
my_dict[key] = 1
return my_dict
In [6]:
from operator import itemgetter
def predict_proba(compoT,table):
predicts = modelnmf.predict_proba(compoT)[:,1]
beats = list(table)
sortedBeats = []
numPred = len(predicts)
nvals={}
for i in range(0,numPred):
word = beats[i]
sel = selectPearson(word)
if sel=={}:
print("palabra no encontrada en pearson:",word)
sortedBeats.append({'word':beats[i],'predict':predicts[i],'pacientes':sel['pacientes'],
'porcentaje de muertos':sel['porcentaje de muertos'],"p1":sel['p1']})
if predicts[i]>.99 : addValue("99",nvals)
elif predicts[i]>.90 : addValue("90",nvals)
elif predicts[i]>.80 : addValue("80",nvals)
elif predicts[i]>.70 : addValue("70",nvals)
else: addValue("under",nvals)
print(nvals)
return sorted(sortedBeats, key=itemgetter('p1'), reverse=True)
In [7]:
table = convert_matrix(with_pearson=True) #.iloc[:,:400]
survived = table.index.labels[1].tolist()
patients = table.values
table
Out[7]:
In [8]:
from sklearn.model_selection import train_test_split
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
In [9]:
model,acurracy = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
logit_roc_auc = auc_model("Logistic",model,patients_test,survived_test)
In [10]:
def countPatients(word,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='''SELECT count(1),sum(isalive) FROM matrix m LEFT JOIN subjectwords s
ON m.subject_id=s.subject_id where m.word = %s GROUP BY m.word'''
# print(cur.mogrify(select_statement,(word,)))
cur.execute(select_statement,(word,))
select = {}
for row in cur :
select = {"patient":row[0],"deadPatient":row[1],}
cur.close()
conn.close()
return select
In [11]:
from scipy.stats.stats import pearsonr
columns = list(table.columns.values)
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
word = columns[i]
count = countPatients(word)
deadPatient,patient = count['deadPatient'],count['patient']
percent = deadPatient/patient
pearsonList.append({'word':word,'p1':pearson[0],'p2':pearson[1],'percent':"{0:.2%}".format(percent)+" de %d" %patient})
In [12]:
pearsonList = sorted(pearsonList, key=itemgetter('p1'), reverse=True)
pearsonList[:5]
Out[12]:
In [13]:
modelnmf, bestNmf, patientnmf, nmf_roc_auc, accuracyScore, p1, bestScore = find_best_NMF(patients,survived)
In [14]:
from sklearn.metrics import classification_report
print("bestScore "+str(bestScore)+" accurracy is %2.2f"% accuracyScore)
print(classification_report(survived,modelnmf.predict(patientnmf)))
In [16]:
import numpy as np
compoT = np.transpose(bestNmf.components_)
print("components",bestNmf.components_.shape)
print("components Transpose",compoT.shape)
In [17]:
sortedBeats = predict_proba(compoT,table)
beatKeys = []
for value in sortedBeats:
beatKeys.append(value['word'])
In [18]:
print("Top 5 HeartBeats con mayor probabilidad de muerte según la regresión logística")
print(sortedBeats[:5])
plot_word(sortedBeats[:5])
In [31]:
from operator import itemgetter
from scipy.stats.stats import pearsonr
patients_trainnmf = patientnmf # bestNmf.transform(patients_train)
pearsonList = []
for i in range(bestScore):
patientpear=patients_trainnmf[:,i]
pearson = pearsonr(patientpear,survived_train)
# if(pearson[0]>0):
pearsonList.append({'group':i,'p1':pearson[0],'p2':pearson[1]})
sortedList = sorted(pearsonList, key=itemgetter('p1'), reverse=True)
sortedList[:10]
In [23]:
columns = list(table)
components = bestNmf.components_
topword = print_top_words(components, columns,topic_index=sortedList[0]['group'])[0]['features']
subwords = []
for subword in topword:
if subword['p1']>0:
subwords.append(subword['word'])
print(str(subwords[:10]))
In [24]:
table = convert_matrix(sumvals=False,filter_words=tuple(subwords))
survived = table.index.labels[1].tolist()
patients = table.values
table
Out[24]:
In [25]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)
In [26]:
model,acurracy = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
In [27]:
logit_roc_auc = auc_model("Logistic",model,patients_test,survived_test)
In [28]:
columns = list(table.columns.values)
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
word = columns[i]
count = countPatients(word)
deadPatient,patient = count['deadPatient'],count['patient']
percent = deadPatient/patient
pearsonList.append({'word':word,'p1':pearson[0],'p2':pearson[1],'percent':"{0:.2%}".format(percent)+" de %d" %patient})
In [29]:
pearsonList = sorted(pearsonList, key=itemgetter('p1'), reverse=True)
pearsonList[:5]
Out[29]:
In [ ]: