In [1]:
import pandas as pd
import psycopg2
import numpy as np
from scipy.stats.stats import pearsonr
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
import sys 
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression

In [3]:
def selectMatrix(withPearson,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT m.subject_id,m.word,m.counting,s.isalive "
                      " FROM matrix m LEFT JOIN subjectwords s ON m.subject_id=s.subject_id"
    )
    if withPearson:
        select_stament = select_stament+" WHERE m.word in (select word from wordspearson where p1>0.01) "
    cur.execute(select_stament)
    select = []
    for row in cur :
        select.append((row))
    cur.close()
    conn.close()
    return select

In [4]:
def convertMatrix(withPearson=False) :
    labels = ['subject_id', 'Word', 'Counting','isAlive']
    df = pd.DataFrame.from_records(selectMatrix(withPearson), columns=labels)
    print(len(df))
    return pd.pivot_table(df,index=["subject_id","isAlive"],columns=["Word"],values=["Counting"],
                       aggfunc={"Counting":[np.sum]},fill_value=0)

In [ ]:
def savePearson(pearson,dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    insert_statement=('INSERT INTO wordspearson(word,p1,p2)'
                      ' SELECT unnest( %(word)s ) ,'
                      ' unnest( %(p1)s) ,'
                      ' unnest( %(p2)s)')
    word=[r['word'] for r in pearson]
    p1=[r['p1'] for r in pearson]
    p2=[r['p2'] for r in pearson]
#    print(cur.mogrify(insert_statement,locals()))
    cur.execute(insert_statement,locals())
    conn.commit()
    cur.close()
    conn.close()

In [ ]:
table = convertMatrix()


4569945

In [ ]:
survived = table.index.labels[1].tolist()
patients = table.values
columns = list(table.columns.values)

In [ ]:
pearsonList = []
for i in range(len(columns)):
    pearson = pearsonr(patients[:,i],survived)
    pearsonList.append({'word':columns[i][2],'p1':pearson[0],'p2':pearson[1]})
savePearson(pearsonList)

In [ ]:
table = convertMatrix(True)

In [ ]:
survived = table.index.labels[1].tolist()
patients = table.values

In [ ]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)

In [ ]:
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)

In [ ]:
logitmodelInitSAPS,best_val = ajustLogisticRegression(patients_train,survived_train,patients_test)

In [ ]:
model = LogisticRegression(C=best_val,
                             fit_intercept=True, penalty='l2', 
                                                dual=True,  solver='liblinear',  n_jobs=-1, verbose=1, 
                                                random_state=0)
model.fit(patients_train,survived_train)

In [ ]:
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,model.predict(patients_test)))

In [ ]:
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,logitmodelInitSAPS.predict(patients_test)))

In [ ]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report

In [ ]:
def aucModel(name,survived_test,model,patients_test):
    logit_roc_auc = roc_auc_score(survived_test,model.predict(patients_test))
    print(name+" AUC = %2.2f"% logit_roc_auc)
    return logit_roc_auc

In [ ]:
model = LogisticRegression(penalty='l2',C=int(best_val),class_weight="balanced", 
                                                dual=True,  solver='liblinear',  
                                                random_state=0, max_iter=51200)
model.fit(patients_train,survived_train)
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,model.predict(patients_test)))
logit_roc_auc = aucModel("Logistic",survived_test,model,patients_test)

In [ ]:
logitmodelInitSAPS

In [ ]:
print("Logistic Regression acurracy is %2.2f" % accuracy_score(survived_test,logitmodelInitSAPS.predict(patients_test)))
logit_roc_auc = aucModel("Logistic",survived_test,model,patients_test)

In [ ]: