In [2]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from Matrix import convert_matrix
from NMF import generateNMF
from sklearn.model_selection import train_test_split
import psycopg2
import numpy
from psycopg2.extensions import register_adapter, AsIs
In [3]:
table = convert_matrix(sumvals=False)
table
Out[3]:
In [4]:
subjects = table.index.levels[0].tolist()
survived = table.index.labels[1].tolist()
patients = table.values
patients_train, patients_test,subject_train, subject_test,survived_train, survived_test = train_test_split(patients,subjects,
survived,test_size=0.2, random_state=42)
In [5]:
from math import log,expm1
def select_score(subjects, table='saps', dbname="mimic"):
"""Selects the saps of the subjects.
"""
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_stament = '''SELECT a.'''+table+''',a.icustay_id
FROM '''+table+''' a
INNER JOIN (select subject_id,max(icustay_id) as icustay_id from '''+table+''' group by subject_id) b
ON a.subject_id = b.subject_id and a.icustay_id = b.icustay_id
WHERE a.subject_id in %s '''
cur.execute(select_stament,(subjects,))
select = []
for row in cur:
score= row[0]
if table == 'saps':
logit = -7.7631+0.0737*score+0.9971*log(score+1)
score = expm1(logit)/(1+expm1(logit))
if table == 'sofa':
score = score/24
select.append(score)
cur.close()
conn.close()
return select
In [6]:
def addapt_numpy_float64(numpy_float64):
return AsIs(numpy_float64)
register_adapter(numpy.float64, addapt_numpy_float64)
def addapt_numpy_int64(numpy_int64):
return AsIs(numpy_int64)
register_adapter(numpy.int64, addapt_numpy_int64)
In [7]:
saps = select_score(tuple(subject_test),table='saps')
sofa = select_score(tuple(subject_test),table='sofa')
In [8]:
from sklearn.metrics import roc_auc_score
roc_auc_saps = roc_auc_score(survived_test, saps)
roc_auc_sofa = roc_auc_score(survived_test, sofa)
In [23]:
def calculateReg():
table = convert_matrix(with_pearson=400,len_words=(6,6))
subjects = table.index.levels[0].tolist()
survived = table.index.labels[1].tolist()
patients = table.values
p_train, p_test,surv_train, surv_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
logisticReg = ajustLogisticRegression(p_train,surv_train,p_test,surv_test)
regresion = logisticReg['model'].predict_proba(p_test)[:,1]
return logisticReg,regresion,surv_test,p_test
logisticReg,regresion,surv_test,p_test = calculateReg() 13 20 66
In [9]:
logisticReg['model'].coef_
Out[9]:
In [16]:
components = [{'n_components':12,'type':'Menor error'}
,{'n_components':5,'type':'Mejor Coeficiente de pearson'}
,{'n_components':19,'type':'Precisión'},
{'n_components':56,'type':'AUC'}
]
nmfresults = []
for component in {12,19,56,5}:
print(component)
nmfGen = generateNMF(patients, survived, n_components=component)
nmfGen['name']='NMF '+str(component)
nmfresults.append(nmfGen)
In [11]:
import numpy as np
patients_nmf= nmfresults[2]['patients_nmf']
print(patients_nmf.shape)
maxi = -1
for pati in patients_nmf:
for pa in pati:
if maxi < pa:
maxi = pa
print(maxi)
In [22]:
import plotly.plotly as py
import plotly.graph_objs as go
trace = go.Heatmap(z=patients_nmf)
data=[trace]
py.iplot(data, filename='basic-heatmap')
Out[22]:
In [13]:
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
In [14]:
def roc_curveplot(name,y_true,y_score,logit_roc_auc):
fpr, tpr, thresholds = roc_curve(y_true,y_score)
plt.plot(fpr,tpr,label=str(name)+' AUC =%.3f' % logit_roc_auc)
In [17]:
fig_size = [6, 6]
plt.rcParams["figure.figsize"] = fig_size
plt.figure()
roc_curveplot('SAPS',survived_test,saps,roc_auc_saps)
roc_curveplot('SOFA',survived_test,sofa,roc_auc_sofa)
roc_curveplot('Regression',surv_test,regresion,logisticReg['roc_auc'])
for nmfresult in nmfresults:
roc_curveplot(nmfresult['name'],nmfresult['survived_test'],nmfresult['predict_poba'],nmfresult['roc_auc'])
plt.plot([0,1],[0,1],'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC comparando resultados')
plt.legend(loc="lower right")
plt.show()
In [ ]: