In [1]:
from time import time
import psycopg2
from collections import Counter
import gc
import pandas as pd
import numpy as np
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.metrics import classification_report
np.set_printoptions(suppress=True,precision=10)
from sklearn.model_selection import train_test_split
In [2]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from Matrix import convert_matrix
In [42]:
def selectMatrix(with_pearson,len_words=3):
table = convert_matrix(with_pearson=with_pearson,len_words=(len_words,len_words))
subjects = table.index.levels[0].tolist()
survived = table.index.labels[1].tolist()
patients = table.values
patients_train, patients_test,subject_train, subject_test,survived_train, survived_test = train_test_split(patients,subjects,
survived,test_size=0.2, random_state=42)
model,accuracy_score,roc_auc = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
regresion = model.predict_proba(patients_test)[:,1]
print('accuracy_score',accuracy_score)
return survived_test,regresion,roc_auc
In [35]:
def fillResults(test_values,len_words=3):
nmfresults = []
for i in test_values:
survived_test,regresion,roc_auc = selectMatrix(i,len_words)
nmfresults.append({'name':i, 'survived_test':survived_test,'regresion':regresion,'roc_auc':roc_auc})
return nmfresults
In [28]:
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
In [29]:
def roc_curveplot(name,y_true,y_score,logit_roc_auc):
fpr, tpr, thresholds = roc_curve(y_true,y_score)
plt.plot(fpr,tpr,label='n= '+str(name)+' AUC =%0.3f' % logit_roc_auc)
In [36]:
fig_size = [6, 6]
plt.rcParams["figure.figsize"] = fig_size
def plotResult(nmfresult,size):
plt.figure()
for nmfresult in nmfresults:
roc_curveplot(nmfresult['name'],nmfresult['survived_test'],nmfresult['regresion'],nmfresult['roc_auc'])
plt.plot([0,1],[0,1],'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('Tasa de Falsos Positivos')
plt.ylabel('Tasa de Verdaderos Positivos')
plt.title('Curva ROC para regresiones usando '+str(size)+' clases')
plt.legend(loc="lower right")
plt.show()
In [43]:
nmfresults = fillResults((1,42,84,168,252,294,336),3)
plotResult(nmfresults,3)
In [44]:
clases = 4
nmfresults = fillResults((50,100,150,200,250),clases)
plotResult(nmfresults,clases)
In [45]:
clases = 5
nmfresults = fillResults((50,100,150,200,250,400),clases)
plotResult(nmfresults,clases)
In [46]:
clases = 6
nmfresults = fillResults((50,100,150,200,250,300,400),clases)
plotResult(nmfresults,clases)
In [47]:
clases = 7
nmfresults = fillResults((50,100,150,200,250,300,400),clases)
plotResult(nmfresults,clases)
In [48]:
clases = 8
nmfresults = fillResults((50,100,150,200,250,300,400),clases)
plotResult(nmfresults,clases)
In [ ]: