In [1]:
import pandas as pd
import psycopg2
import numpy as np
from scipy.stats.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
In [2]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
In [3]:
def selectMatrix(withPearson,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_stament = ("SELECT m.subject_id,m.word,m.counting,s.isalive "
" FROM matrix m LEFT JOIN subjectwords s ON m.subject_id=s.subject_id"
)
if withPearson:
select_stament = select_stament+" WHERE m.word in (select word from wordspearson where p1>0.01) "
cur.execute(select_stament)
select = []
for row in cur :
select.append((row))
cur.close()
conn.close()
return select
In [4]:
def convertMatrix(withPearson=False) :
labels = ['subject_id', 'Word', 'Counting','isAlive']
df = pd.DataFrame.from_records(selectMatrix(withPearson), columns=labels)
print(len(df))
return pd.pivot_table(df,index=["subject_id","isAlive"],columns=["Word"],values=["Counting"],
aggfunc={"Counting":[np.sum]},fill_value=0)
In [ ]:
def savePearson(pearson,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
insert_statement=('INSERT INTO wordspearson(word,p1,p2)'
' SELECT unnest( %(word)s ) ,'
' unnest( %(p1)s) ,'
' unnest( %(p2)s)')
word=[r['word'] for r in pearson]
p1=[r['p1'] for r in pearson]
p2=[r['p2'] for r in pearson]
# print(cur.mogrify(insert_statement,locals()))
cur.execute(insert_statement,locals())
conn.commit()
cur.close()
conn.close()
In [ ]:
table = convertMatrix()
In [ ]:
survived = table.index.labels[1].tolist()
patients = table.values
columns = list(table.columns.values)
In [ ]:
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
pearsonList.append({'word':columns[i][2],'p1':pearson[0],'p2':pearson[1]})
savePearson(pearsonList)
In [ ]:
table = convertMatrix(True)
In [ ]:
survived = table.index.labels[1].tolist()
patients = table.values
In [ ]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
In [ ]:
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)
In [ ]:
logitmodelInitSAPS,best_val = ajustLogisticRegression(patients_train,survived_train,patients_test)
In [ ]:
model = LogisticRegression(C=best_val,
fit_intercept=True, penalty='l2',
dual=True, solver='liblinear', n_jobs=-1, verbose=1,
random_state=0)
model.fit(patients_train,survived_train)
In [ ]:
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,model.predict(patients_test)))
In [ ]:
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,logitmodelInitSAPS.predict(patients_test)))
In [ ]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report
In [ ]:
def aucModel(name,survived_test,model,patients_test):
logit_roc_auc = roc_auc_score(survived_test,model.predict(patients_test))
print(name+" AUC = %2.2f"% logit_roc_auc)
return logit_roc_auc
In [ ]:
model = LogisticRegression(penalty='l2',C=int(best_val),class_weight="balanced",
dual=True, solver='liblinear',
random_state=0, max_iter=51200)
model.fit(patients_train,survived_train)
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,model.predict(patients_test)))
logit_roc_auc = aucModel("Logistic",survived_test,model,patients_test)
In [ ]:
logitmodelInitSAPS
In [ ]:
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,logitmodelInitSAPS.predict(patients_test)))
logit_roc_auc = aucModel("Logistic",survived_test,model,patients_test)
In [ ]: