Prediction Logitsitc Regresion

Imports


In [1]:
from time import time
import psycopg2
from collections import Counter
import gc
import pandas as pd
import numpy as np
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.metrics import classification_report
np.set_printoptions(suppress=True,precision=10)
from sklearn.model_selection import train_test_split

In [2]:
import sys 
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from Matrix import convert_matrix

In [3]:
def selectMatrix(with_pearson):
    table = convert_matrix(with_pearson=with_pearson,len_words=(3,3))
    subjects = table.index.levels[0].tolist()
    survived = table.index.labels[1].tolist()
    patients = table.values
    patients_train, patients_test,subject_train, subject_test,survived_train, survived_test = train_test_split(patients,subjects, 
                                                                               survived,test_size=0.2, random_state=42)
    model,accuracy_score,roc_auc = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
    regresion = model.predict_proba(patients_test)[:,1]
    return survived_test,regresion,roc_auc

In [27]:
nmfresults = []
for i in (1,42,84,168,252,294,336):
    survived_test,regresion,roc_auc = selectMatrix(i)
    nmfresults.append({'name':i, 'survived_test':survived_test,'regresion':regresion,'roc_auc':roc_auc})


(155, 1)
(468, 42)
(488, 84)
(563, 168)
(575, 252)
(577, 294)
(580, 336)

In [28]:
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

In [29]:
def roc_curveplot(name,y_true,y_score,logit_roc_auc):
    fpr, tpr, thresholds = roc_curve(y_true,y_score)
    plt.plot(fpr,tpr,label='n= '+str(name)+' AUC =%0.3f' % logit_roc_auc)

In [30]:
plt.figure()
for nmfresult in nmfresults:
    roc_curveplot(nmfresult['name'],nmfresult['survived_test'],nmfresult['regresion'],nmfresult['roc_auc'])
plt.plot([0,1],[0,1],'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()



In [ ]: