In [1]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from PlotWords import plot_word
from Matrix import convert_matrix
from NMF import generateNMF,find_best_NMF
In [2]:
def findpatient(message,table,beats):
groupsize = table.shape[0]
deads = sum(table.index.labels[1].tolist())
percent = -1 if groupsize==0 else deads/groupsize
if groupsize!=0:
print(message,"groupsize:%d"%groupsize,"deads:%d"%deads,"percent:{0:.2%}".format(percent),beats)
In [3]:
import psycopg2
def selectPearson(word,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='SELECT patient,deadpatient,p1 FROM wordspearson WHERE word = %s'
# print(cur.mogrify(select_statement,locals()))
cur.execute(select_statement,(word,))
select = {}
for row in cur :
patient=row[0]
select = {'word':word,"pacientes":patient,"porcentaje de muertos":"{0:.2%}".format(row[1]/patient),"p1":row[2]}
cur.close()
conn.close()
return select
In [4]:
def print_top_words(model, feature_names,topic_index=None):
result = []
for topic_idx, topic in enumerate(components):
features = [{"word":feature_names[i],"p1":topic[i]}
for i in topic.argsort()[:-1] if topic[i] > 0]
if features!=[] and (topic_index==None or topic_index==topic_idx): result.append({"topic":topic_idx,"features":features})
return result
In [5]:
def addValue(key,my_dict):
if key in my_dict:
my_dict[key] += 1
else:
my_dict[key] = 1
return my_dict
In [6]:
table = convert_matrix(with_pearson=True) #.iloc[:,:400]
survived = table.index.labels[1].tolist()
patients = table.values
table
(554, 400)
Out[6]:
aaadc
aabadd
aabaf
aabafb
aabaga
aabbg
aabbgb
aabdab
aabdad
aabdb
...
hag
haj
hbf
hdd
hddd
hfa
hfg
hga
hgd
jag
subject_id
isAlive
135
1
0
0
0
0
0
0
0
1
0
0
...
0
0
0
0
0
0
0
0
0
0
151
1
0
1
0
0
2
0
0
2
0
2
...
0
0
0
0
0
0
0
0
0
0
177
1
0
0
0
0
0
0
0
1
0
7
...
0
0
0
0
0
0
0
0
0
0
214
1
0
0
2
0
1
0
0
0
0
2
...
0
0
0
0
0
0
0
0
0
0
263
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
279
1
0
0
0
0
0
0
0
1
0
6
...
0
0
0
0
0
0
0
0
0
0
283
1
0
1
0
0
0
0
0
2
0
2
...
0
0
0
0
0
0
0
0
0
0
368
1
0
0
1
1
1
1
1
2
0
0
...
0
0
0
0
0
0
0
0
0
0
377
1
1
1
2
1
3
0
0
8
2
16
...
0
0
0
1
1
1
0
0
0
0
408
1
0
0
0
0
0
0
0
2
5
2
...
0
0
0
0
0
0
0
0
0
0
462
0
0
8
2
1
0
0
0
14
4
15
...
0
0
0
0
0
0
0
0
0
0
618
1
0
0
0
0
0
0
0
1
0
5
...
0
0
0
0
0
0
0
0
0
0
638
1
2
0
2
2
1
1
0
2
0
3
...
0
1
0
1
1
0
0
1
0
0
682
1
0
0
1
0
0
0
0
0
0
1
...
3
2
0
1
1
7
0
3
0
2
736
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
743
1
0
0
0
0
0
0
0
2
0
6
...
1
1
0
0
0
1
0
0
0
0
749
1
0
0
1
0
0
0
0
0
1
1
...
0
0
0
0
0
0
0
0
0
0
793
1
2
2
2
1
3
0
0
1
0
0
...
0
0
0
0
0
0
0
0
0
0
886
1
0
2
1
0
0
0
0
1
0
0
...
0
0
0
0
0
1
0
0
0
0
952
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
1004
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
1075
1
1
1
2
0
1
1
0
1
6
6
...
0
0
0
0
0
0
0
0
1
0
1144
0
0
0
0
0
0
1
0
0
0
1
...
0
0
0
1
0
0
0
1
0
0
1160
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
1222
0
0
0
4
2
0
2
0
5
2
10
...
0
0
0
1
0
0
0
0
0
0
1226
1
0
0
2
1
0
0
0
1
0
3
...
0
0
0
0
0
0
0
0
0
0
1459
0
0
4
4
3
0
0
0
10
1
24
...
0
0
0
0
0
0
0
0
0
0
1528
1
0
1
6
2
3
0
0
3
0
3
...
0
0
0
0
0
0
0
0
0
0
1531
1
0
0
0
0
0
0
0
0
0
1
...
0
0
0
0
0
0
0
0
0
0
1569
1
0
0
6
1
0
1
0
1
2
3
...
1
0
0
0
0
0
0
2
0
3
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
23034
1
0
0
6
4
0
0
0
13
2
17
...
1
0
0
0
0
0
0
0
1
0
23097
1
10
4
7
1
1
1
0
3
4
11
...
0
0
0
1
1
0
0
0
0
0
23120
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23130
1
0
0
1
0
0
0
0
2
0
3
...
0
0
0
0
0
0
0
0
0
0
23178
1
0
0
3
3
0
2
1
2
0
14
...
0
0
0
1
0
0
0
0
1
0
23200
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23298
0
0
0
0
0
0
0
0
1
0
1
...
0
0
0
0
0
0
0
0
0
0
23336
1
0
0
1
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23363
1
0
0
1
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23384
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23401
1
0
10
2
0
0
0
0
8
2
8
...
0
0
0
0
0
0
0
0
0
0
23451
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23468
1
3
1
0
0
1
0
0
5
5
8
...
1
0
0
0
0
0
0
0
0
0
23474
1
0
0
3
0
1
1
0
3
0
9
...
0
0
0
0
0
0
0
0
1
0
23510
1
0
0
0
0
1
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23944
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24004
1
0
0
4
0
0
0
0
4
0
2
...
0
0
0
1
0
0
0
0
0
0
24076
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24129
1
0
0
0
0
0
0
0
1
0
0
...
0
0
0
0
0
0
0
1
0
0
24133
0
0
0
1
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24142
1
0
0
2
1
2
0
0
3
0
23
...
0
0
0
0
0
0
0
0
0
0
24152
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24185
1
0
0
0
0
0
0
0
5
0
7
...
0
0
0
0
0
0
0
0
0
0
24227
0
0
0
0
0
0
0
0
0
0
3
...
0
0
0
0
0
0
0
0
0
0
25466
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
41962
1
0
3
6
0
0
0
0
1
2
5
...
2
0
0
8
2
4
2
1
4
1
42261
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
42410
0
0
0
0
0
0
0
0
0
0
1
...
0
0
0
0
0
0
0
0
0
0
42492
0
0
0
2
1
1
0
0
0
0
1
...
0
0
0
0
0
0
0
0
0
0
43459
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
554 rows × 400 columns
In [7]:
from sklearn.model_selection import train_test_split
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
In [8]:
model,acurracy,logit_roc_auc = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
In [9]:
def countPatients(word,dbname="mimic") :
conn = psycopg2.connect("dbname="+dbname)
cur = conn.cursor()
select_statement='''SELECT count(1),sum(isalive) FROM matrix m LEFT JOIN subjectwords s
ON m.subject_id=s.subject_id where m.word = %s GROUP BY m.word'''
# print(cur.mogrify(select_statement,(word,)))
cur.execute(select_statement,(word,))
select = {}
for row in cur :
select = {"patient":row[0],"deadPatient":row[1],}
cur.close()
conn.close()
return select
In [10]:
from scipy.stats.stats import pearsonr
columns = list(table.columns.values)
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
word = columns[i]
count = countPatients(word)
deadPatient,patient = count['deadPatient'],count['patient']
percent = deadPatient/patient
pearsonList.append({'word':word,'correlación':pearson[0],'p-valor':pearson[1],'pacientes muertos':"{0:.2%}".format(percent)+" de %d" %patient})
In [11]:
from operator import itemgetter
pearsonList = sorted(pearsonList, key=itemgetter('correlación'), reverse=True)
pearsonList
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.5f' % x)
df = pd.DataFrame(pearsonList)
df = df.set_index('word')
df = df.sort_values(['correlación'], ascending=[False])
df
Out[11]:
correlación
p-valor
pacientes muertos
word
babaaf
0.14448
0.00065
90.11% de 91
babaf
0.13705
0.00122
84.00% de 150
ababaf
0.13675
0.00125
90.10% de 101
abbg
0.13498
0.00145
86.57% de 134
fbabd
0.13318
0.00168
97.44% de 39
abaf
0.13287
0.00172
81.18% de 287
dafaab
0.13162
0.00191
95.35% de 43
bababf
0.13056
0.00207
87.36% de 87
fbbaba
0.12763
0.00262
89.47% de 95
dbaaf
0.12759
0.00262
92.73% de 55
adbaf
0.12740
0.00266
90.67% de 75
bja
0.12554
0.00308
90.28% de 72
abafb
0.12473
0.00328
83.78% de 148
aabbg
0.12439
0.00336
88.51% de 87
abfba
0.12410
0.00344
83.65% de 159
fabd
0.12363
0.00356
90.29% de 103
dabc
0.12319
0.00368
90.91% de 77
abbga
0.12279
0.00380
88.10% de 84
bafba
0.12228
0.00395
83.67% de 147
dfaaba
0.12125
0.00426
95.00% de 40
bfaf
0.12117
0.00429
92.59% de 54
fafab
0.12010
0.00465
100.00% de 25
bfba
0.11952
0.00485
81.85% de 248
fbda
0.11952
0.00485
88.35% de 103
ffdaa
0.11891
0.00507
95.45% de 44
abbh
0.11862
0.00518
91.04% de 67
fabdb
0.11804
0.00541
100.00% de 42
dbaafa
0.11796
0.00544
94.87% de 39
baaag
0.11775
0.00552
85.71% de 119
hbf
0.11745
0.00564
100.00% de 24
...
...
...
...
aabadd
0.02390
0.57452
85.22% de 115
abdbd
0.02309
0.58767
86.26% de 131
bdaad
0.02280
0.59234
85.89% de 163
aacda
0.02241
0.59857
88.51% de 87
bdd
0.02081
0.62508
82.53% de 269
dcaa
0.01896
0.65609
87.30% de 126
dca
0.01591
0.70863
84.05% de 163
acabaa
0.01534
0.71856
82.58% de 178
bdbbd
0.01433
0.73647
85.47% de 117
dbabd
0.01384
0.74507
85.61% de 132
bdaab
0.01269
0.76564
79.17% de 336
bdda
0.01250
0.76905
81.69% de 213
dcd
0.01142
0.78846
88.73% de 71
bbdd
0.00983
0.81745
85.47% de 179
abdad
0.00928
0.82747
85.09% de 161
babdaa
0.00887
0.83490
81.01% de 258
ddbaa
0.00725
0.86489
82.93% de 164
bdbd
0.00577
0.89214
85.79% de 183
aabdad
0.00078
0.98546
84.87% de 119
dabd
-0.00205
0.96162
83.26% de 215
ddabd
-0.00319
0.94023
92.45% de 53
bddaa
-0.00327
0.93873
83.54% de 164
bdaada
-0.00378
0.92934
84.21% de 133
bbdbd
-0.00753
0.85966
86.07% de 122
abdada
-0.00925
0.82801
84.73% de 131
abdd
-0.01762
0.67908
82.23% de 197
bdabd
-0.01860
0.66230
85.71% de 119
bdbdb
-0.02037
0.63238
84.68% de 124
dabda
-0.02888
0.49761
83.24% de 173
daabd
-0.03705
0.38412
83.53% de 170
400 rows × 3 columns
In [12]:
find_best_NMF(patients,survived)
{'pearson': 0.05376539303670632, 'diffErr': None, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 2162.3655318943565, 'n_components': 2}
{'pearson': 0.097055800137172007, 'diffErr': 440.15960104444525, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 1722.2059308499113, 'n_components': 3}
{'pearson': 0.090018400435492876, 'diffErr': 234.52210889286789, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 1487.6838219570434, 'n_components': 4}
{'pearson': 0.081563786972520536, 'diffErr': 214.44990071202324, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 1273.2339212450202, 'n_components': 5}
{'pearson': 0.079673449258838888, 'diffErr': 121.27194865084743, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 1151.9619725941727, 'n_components': 6}
{'pearson': 0.12737775530295065, 'diffErr': 71.658850984822038, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 1080.3031216093507, 'n_components': 7}
{'pearson': 0.12715761038199216, 'diffErr': 75.007581107189594, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 1005.2955405021611, 'n_components': 8}
{'pearson': 0.12364285814459262, 'diffErr': 78.577705555105695, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 926.71783494705539, 'n_components': 9}
{'pearson': 0.1222106018457856, 'diffErr': 63.967251255186284, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 862.75058369186911, 'n_components': 10}
{'pearson': 0.099045533691014515, 'diffErr': 51.181103957561277, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 811.56947973430783, 'n_components': 11}
{'pearson': 0.099084258886736207, 'diffErr': 36.475940264779297, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 775.09353946952854, 'n_components': 12}
{'pearson': 0.10246098678989352, 'diffErr': 27.017598185873226, 'accuracy': 0.7567567567567568, 'roc_auc': 0.49784482758620685, 'recostrucción error': 748.07594128365531, 'n_components': 13}
{'pearson': 0.10884052466783017, 'diffErr': 38.138504964553135, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 709.93743631910218, 'n_components': 14}
{'pearson': 0.090041511365150456, 'diffErr': 30.003798853342801, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 679.93363746575938, 'n_components': 15}
{'pearson': 0.10204645411700471, 'diffErr': 26.314215507768722, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 653.61942195799065, 'n_components': 16}
{'pearson': 0.10124549238778958, 'diffErr': 23.896084496502112, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 629.72333746148854, 'n_components': 17}
{'pearson': 0.11822625137185924, 'diffErr': 22.544328158030567, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 607.17900930345797, 'n_components': 18}
{'pearson': 0.11335065884627309, 'diffErr': 24.670450717502149, 'accuracy': 0.7927927927927928, 'roc_auc': 0.53591954022988508, 'recostrucción error': 582.50855858595583, 'n_components': 19}
{'pearson': 0.11115230437067979, 'diffErr': 29.813402608102365, 'accuracy': 0.7927927927927928, 'roc_auc': 0.53591954022988508, 'recostrucción error': 552.69515597785346, 'n_components': 20}
{'pearson': 0.10890175755782711, 'diffErr': 20.929019423761247, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 531.76613655409221, 'n_components': 21}
{'pearson': 0.10864733451773047, 'diffErr': 17.838291790875701, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 513.92784476321651, 'n_components': 22}
{'pearson': 0.10430678586646505, 'diffErr': 21.372749984657048, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 492.55509477855946, 'n_components': 23}
{'pearson': 0.13529780606043829, 'diffErr': 17.744502982209156, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 474.81059179635031, 'n_components': 24}
{'pearson': 0.13597996122133038, 'diffErr': 12.156754929258341, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 462.65383686709197, 'n_components': 25}
{'pearson': 0.13625247830829873, 'diffErr': 10.947475664333069, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 451.7063612027589, 'n_components': 26}
{'pearson': 0.1356389980344033, 'diffErr': 13.977371239191598, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 437.7289899635673, 'n_components': 27}
{'pearson': 0.13113745749974789, 'diffErr': 12.321043593860225, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 425.40794636970708, 'n_components': 28}
{'pearson': 0.13434291529512474, 'diffErr': 11.646934011812561, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 413.76101235789451, 'n_components': 29}
{'pearson': 0.1332552104002945, 'diffErr': 8.8676138972157901, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 404.89339846067872, 'n_components': 30}
{'pearson': 0.13310692291439358, 'diffErr': 11.33595210415649, 'accuracy': 0.7567567567567568, 'roc_auc': 0.47701149425287354, 'recostrucción error': 393.55744635652223, 'n_components': 31}
{'pearson': 0.12279439837122308, 'diffErr': 6.2225822373068809, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 387.33486411921535, 'n_components': 32}
{'pearson': 0.12582738312735586, 'diffErr': 7.5661593329539869, 'accuracy': 0.76576576576576572, 'roc_auc': 0.49784482758620685, 'recostrucción error': 379.76870478626137, 'n_components': 33}
{'pearson': 0.1233475452935988, 'diffErr': 9.8408870973310059, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 369.92781768893036, 'n_components': 34}
{'pearson': 0.12359598837132334, 'diffErr': -0.36642654632964877, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 370.29424423526001, 'n_components': 35}
{'pearson': 0.13238134762429354, 'diffErr': 15.520605249025209, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 354.7736389862348, 'n_components': 36}
{'pearson': 0.13145715342181577, 'diffErr': 0.92703371833732717, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 353.84660526789747, 'n_components': 37}
{'pearson': 0.1234834973805936, 'diffErr': 3.9009479982794915, 'accuracy': 0.7567567567567568, 'roc_auc': 0.49784482758620685, 'recostrucción error': 349.94565726961798, 'n_components': 38}
{'pearson': 0.12034045811701181, 'diffErr': 15.409030714449443, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 334.53662655516854, 'n_components': 39}
{'pearson': 0.12768512333789622, 'diffErr': 5.8330295080371002, 'accuracy': 0.76576576576576572, 'roc_auc': 0.50359195402298851, 'recostrucción error': 328.70359704713144, 'n_components': 40}
{'pearson': 0.12818467028018282, 'diffErr': 4.422483919765682, 'accuracy': 0.7567567567567568, 'roc_auc': 0.47701149425287354, 'recostrucción error': 324.28111312736576, 'n_components': 41}
{'pearson': 0.12768209391396318, 'diffErr': 5.4266172427953734, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 318.85449588457038, 'n_components': 42}
{'pearson': 0.12649395324992271, 'diffErr': 5.6268662973392907, 'accuracy': 0.7567567567567568, 'roc_auc': 0.47701149425287354, 'recostrucción error': 313.22762958723109, 'n_components': 43}
{'pearson': 0.12301791179285991, 'diffErr': 2.3299603924428993, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 310.89766919478819, 'n_components': 44}
{'pearson': 0.12481104557606498, 'diffErr': 6.6167445011430459, 'accuracy': 0.73873873873873874, 'roc_auc': 0.47126436781609193, 'recostrucción error': 304.28092469364515, 'n_components': 45}
{'pearson': 0.11563240238329764, 'diffErr': 3.3893473240239018, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 300.89157736962125, 'n_components': 46}
{'pearson': 0.13093683979132412, 'diffErr': 3.2676572390707292, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 297.62392013055052, 'n_components': 47}
{'pearson': 0.11123601632780356, 'diffErr': 0.59834155080989149, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 297.02557857974062, 'n_components': 48}
{'pearson': 0.10898602879795428, 'diffErr': 6.677871572346362, 'accuracy': 0.72972972972972971, 'roc_auc': 0.46551724137931033, 'recostrucción error': 290.34770700739426, 'n_components': 49}
{'pearson': 0.11789478519734453, 'diffErr': 5.2444714106479182, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 285.10323559674634, 'n_components': 50}
{'pearson': 0.11428020761902029, 'diffErr': 1.8280798713487343, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 283.27515572539761, 'n_components': 51}
{'pearson': 0.10895149035850181, 'diffErr': 2.7829809607944753, 'accuracy': 0.77477477477477474, 'roc_auc': 0.50359195402298851, 'recostrucción error': 280.49217476460313, 'n_components': 52}
{'pearson': 0.11072744402286747, 'diffErr': 4.7081903106206937, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 275.78398445398244, 'n_components': 53}
{'pearson': 0.11043090884173715, 'diffErr': 3.5916345080893279, 'accuracy': 0.7567567567567568, 'roc_auc': 0.47701149425287354, 'recostrucción error': 272.19234994589311, 'n_components': 54}
{'pearson': 0.10860303152906108, 'diffErr': 1.9462277631640177, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 270.2461221827291, 'n_components': 55}
{'pearson': 0.11042438194395338, 'diffErr': 2.2089893875227062, 'accuracy': 0.7567567567567568, 'roc_auc': 0.47701149425287354, 'recostrucción error': 268.03713279520639, 'n_components': 56}
{'pearson': 0.11056222019083801, 'diffErr': 4.2536832908987208, 'accuracy': 0.77477477477477474, 'roc_auc': 0.50933908045977017, 'recostrucción error': 263.78344950430767, 'n_components': 57}
{'pearson': 0.10855009252091609, 'diffErr': 4.4530352118738961, 'accuracy': 0.7567567567567568, 'roc_auc': 0.49784482758620685, 'recostrucción error': 259.33041429243377, 'n_components': 58}
{'pearson': 0.11413790365953326, 'diffErr': 0.063218539682168284, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47126436781609193, 'recostrucción error': 259.2671957527516, 'n_components': 59}
{'pearson': 0.10783599536869071, 'diffErr': 3.5946200138592985, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 255.67257573889231, 'n_components': 60}
{'pearson': 0.10786327294560029, 'diffErr': 6.1234477075406915, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 249.54912803135161, 'n_components': 61}
{'pearson': 0.11153178798310334, 'diffErr': 3.4407679892554199, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 246.10836004209619, 'n_components': 62}
{'pearson': 0.10929600918187392, 'diffErr': 1.3775251947712093, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 244.73083484732499, 'n_components': 63}
{'pearson': 0.10800171192847335, 'diffErr': 4.7805478497244849, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 239.9502869976005, 'n_components': 64}
{'pearson': 0.10919347393273963, 'diffErr': 0.57240736672630987, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 239.37787963087419, 'n_components': 65}
{'pearson': 0.11022478726297691, 'diffErr': 5.3297802997604151, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 234.04809933111378, 'n_components': 66}
{'pearson': 0.10749507012292835, 'diffErr': 1.6786866891908971, 'accuracy': 0.73873873873873874, 'roc_auc': 0.46551724137931033, 'recostrucción error': 232.36941264192288, 'n_components': 67}
{'pearson': 0.10774247111437618, 'diffErr': 2.6082760488228303, 'accuracy': 0.74774774774774777, 'roc_auc': 0.47701149425287354, 'recostrucción error': 229.76113659310005, 'n_components': 68}
{'pearson': 0.10819457132593975, 'diffErr': 0.52110169060102862, 'accuracy': 0.73873873873873874, 'roc_auc': 0.47126436781609193, 'recostrucción error': 229.24003490249902, 'n_components': 69}
{'pearson': 0.10840061030599936, 'diffErr': 3.6988226741294739, 'accuracy': 0.74774774774774777, 'roc_auc': 0.49209770114942525, 'recostrucción error': 225.54121222836955, 'n_components': 70}
{'pearson': 0.10840795392254648, 'diffErr': 2.3544472234675027, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 223.18676500490204, 'n_components': 71}
{'pearson': 0.1338026592380685, 'diffErr': 1.2494485104288913, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 221.93731649447315, 'n_components': 72}
{'pearson': 0.12679120690152884, 'diffErr': 2.5573378511593887, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 219.37997864331376, 'n_components': 73}
{'pearson': 0.11008912759619609, 'diffErr': 3.8664836136147755, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 215.51349502969899, 'n_components': 74}
{'pearson': 0.12422596379221923, 'diffErr': 1.6610409430571451, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 213.85245408664184, 'n_components': 75}
{'pearson': 0.1156094341872473, 'diffErr': 1.6181542621540359, 'accuracy': 0.72972972972972971, 'roc_auc': 0.45977011494252873, 'recostrucción error': 212.23429982448781, 'n_components': 76}
{'pearson': 0.12336383109630233, 'diffErr': 1.825455947382153, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 210.40884387710565, 'n_components': 77}
{'pearson': 0.13002024274834093, 'diffErr': 1.32705524711821, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 209.08178862998744, 'n_components': 78}
{'pearson': 0.12034040339050936, 'diffErr': 3.1098238507635472, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 205.9719647792239, 'n_components': 79}
{'pearson': 0.13080602724505158, 'diffErr': 2.3670574227451198, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 203.60490735647878, 'n_components': 80}
{'pearson': 0.12931131892153361, 'diffErr': -3.0165629784734165, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 206.62147033495219, 'n_components': 81}
{'pearson': 0.1288994255794626, 'diffErr': 4.6211931070489811, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 202.00027722790321, 'n_components': 82}
{'pearson': 0.13298009496205385, 'diffErr': 3.4761567451215285, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 198.52412048278168, 'n_components': 83}
{'pearson': 0.12550266347444977, 'diffErr': -1.6362610138038463, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 200.16038149658553, 'n_components': 84}
{'pearson': 0.12097303851167147, 'diffErr': 4.7402196763711686, 'accuracy': 0.73873873873873874, 'roc_auc': 0.47126436781609193, 'recostrucción error': 195.42016182021436, 'n_components': 85}
{'pearson': 0.12429863387713905, 'diffErr': -1.4432919584201329, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 196.86345377863449, 'n_components': 86}
{'pearson': 0.12008172124865137, 'diffErr': -0.64783866287589831, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 197.51129244151039, 'n_components': 87}
{'pearson': 0.13189348624965153, 'diffErr': 6.3788565193314639, 'accuracy': 0.68468468468468469, 'roc_auc': 0.45186781609195403, 'recostrucción error': 191.13243592217893, 'n_components': 88}
{'pearson': 0.13297653068435561, 'diffErr': -2.0232294549037988, 'accuracy': 0.7567567567567568, 'roc_auc': 0.49784482758620685, 'recostrucción error': 193.15566537708273, 'n_components': 89}
{'pearson': 0.12386758796756281, 'diffErr': 3.6598432113309798, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 189.49582216575175, 'n_components': 90}
{'pearson': 0.12013908694364021, 'diffErr': 2.6203760898033295, 'accuracy': 0.69369369369369371, 'roc_auc': 0.44252873563218392, 'recostrucción error': 186.87544607594842, 'n_components': 91}
{'pearson': 0.12441114724116407, 'diffErr': 0.42183184664102669, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 186.45361422930739, 'n_components': 92}
{'pearson': 0.12481007672869607, 'diffErr': 3.1222785155133295, 'accuracy': 0.76576576576576572, 'roc_auc': 0.4885057471264368, 'recostrucción error': 183.33133571379406, 'n_components': 93}
{'pearson': 0.12991094277381543, 'diffErr': -1.5210975011825667, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 184.85243321497663, 'n_components': 94}
{'pearson': 0.12457430516869321, 'diffErr': 3.5220662426741569, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 181.33036697230247, 'n_components': 95}
{'pearson': 0.12360940563989327, 'diffErr': 1.0244246568673532, 'accuracy': 0.77477477477477474, 'roc_auc': 0.4942528735632184, 'recostrucción error': 180.30594231543512, 'n_components': 96}
{'pearson': 0.12092079275121478, 'diffErr': 2.0728136977318172, 'accuracy': 0.77477477477477474, 'roc_auc': 0.50933908045977017, 'recostrucción error': 178.2331286177033, 'n_components': 97}
{'pearson': 0.11850784026965402, 'diffErr': 1.3070625365479884, 'accuracy': 0.78378378378378377, 'roc_auc': 0.5, 'recostrucción error': 176.92606608115531, 'n_components': 98}
{'pearson': 0.12562156413760861, 'diffErr': 0.68605270609234026, 'accuracy': 0.7567567567567568, 'roc_auc': 0.48275862068965519, 'recostrucción error': 176.24001337506297, 'n_components': 99}
In [13]:
import numpy as np
bestScore = 12
model, nmf, patients_nmf, acurracy, roc_auc = generateNMF(patients, survived, n_components=bestScore)
compoT = np.transpose(nmf.components_)
print("components",nmf.components_.shape)
print("components Transpose",compoT.shape)
components (12, 400)
components Transpose (400, 12)
In [14]:
from operator import itemgetter
def predict_proba(compoT,table,model):
predicts = model.predict_proba(compoT)[:,1]
beats = list(table)
sortedBeats = []
numPred = len(predicts)
print(len(beats),numPred)
nvals={}
for i in range(0,numPred):
word = beats[i]
sel = selectPearson(word)
if sel=={}:
print("palabra no encontrada en pearson:",word)
sortedBeats.append({'word':beats[i],'predict':predicts[i],'pacientes':sel['pacientes'],
'porcentaje de muertos':sel['porcentaje de muertos'],"correlación":sel['p1']})
if predicts[i]>.99 : addValue("99",nvals)
elif predicts[i]>.90 : addValue("90",nvals)
elif predicts[i]>.80 : addValue("80",nvals)
elif predicts[i]>.70 : addValue("70",nvals)
else: addValue("under",nvals)
print(nvals)
return sorted(sortedBeats, key=itemgetter('correlación'), reverse=True)
In [15]:
sortedBeats = predict_proba(compoT,table,model)
beatKeys = []
for value in sortedBeats:
beatKeys.append(value['word'])
400 400
{'80': 70, '70': 110, '90': 88, 'under': 67, '99': 65}
In [16]:
df = pd.DataFrame(sortedBeats)
df = df.set_index('word')
df = df.sort_values(['correlación'], ascending=[False])
df
Out[16]:
correlación
pacientes
porcentaje de muertos
predict
word
adc
0.20889
155
89.68%
0.99887
fdf
0.20378
83
96.39%
0.99931
dbda
0.18907
221
85.07%
0.59133
fbd
0.18370
165
87.27%
0.86346
dff
0.18014
88
93.18%
0.99897
ffd
0.17763
103
91.26%
0.99937
fff
0.17753
81
93.83%
0.99999
bdbd
0.17495
183
85.79%
0.02946
addaba
0.17329
106
90.57%
0.88175
faf
0.17169
154
87.01%
1.00000
dfdd
0.17163
60
96.67%
0.99951
afg
0.17134
105
90.48%
0.99999
haf
0.17118
66
95.45%
0.98105
bdd
0.17036
269
82.53%
0.00000
bdab
0.16941
414
79.23%
0.98765
ddba
0.16922
219
84.02%
0.99999
fabd
0.16740
103
90.29%
0.76081
bbdd
0.16738
179
85.47%
0.05326
gga
0.16714
70
94.29%
0.99989
aadc
0.16713
123
88.62%
0.98968
dbbd
0.16695
192
84.90%
0.01009
gah
0.16649
44
100.00%
0.96022
daf
0.16616
230
83.48%
1.00000
afd
0.16499
221
83.71%
0.99997
faff
0.16444
43
100.00%
0.98875
adaf
0.16388
186
84.95%
0.99992
ababaf
0.16343
101
90.10%
0.72496
adca
0.16318
106
89.62%
0.99019
agg
0.16298
62
95.16%
0.99994
bdaad
0.16258
163
85.89%
0.66851
...
...
...
...
...
bdbaad
0.12117
76
88.16%
0.65504
dbbbad
0.12117
76
88.16%
0.74107
addba
0.12114
164
82.93%
0.99819
ddbaa
0.12114
164
82.93%
0.90549
adaac
0.12110
81
87.65%
0.85661
aabaga
0.12110
81
87.65%
0.72104
daad
0.12106
307
79.48%
0.95275
ddfddd
0.12077
24
100.00%
0.92977
hbf
0.12077
24
100.00%
0.69919
fgag
0.12077
24
100.00%
0.89833
faffa
0.12077
24
100.00%
0.92578
dgadaa
0.12077
24
100.00%
0.73167
hddd
0.12077
24
100.00%
0.94726
dafga
0.12077
24
100.00%
0.76656
adgada
0.12077
24
100.00%
0.74002
aadgad
0.12077
24
100.00%
0.73676
aadadg
0.12077
24
100.00%
0.70743
afdadd
0.12077
24
100.00%
0.73075
ddd
0.12074
223
81.17%
1.00000
dgada
0.12070
31
96.77%
0.79627
jag
0.12070
31
96.77%
0.90347
gaadd
0.12070
31
96.77%
0.73365
fdff
0.12070
31
96.77%
0.88115
baabh
0.12070
31
96.77%
0.68832
dddaf
0.12070
31
96.77%
0.78696
bdabc
0.12070
31
96.77%
0.69108
agha
0.12070
31
96.77%
0.87282
aabbgb
0.12070
31
96.77%
0.68132
aabdad
0.12057
119
84.87%
0.70271
baabd
0.12056
326
79.14%
0.96409
400 rows × 4 columns
In [17]:
print("Top 5 HeartBeats con mayor probabilidad de muerte según la regresión logística")
plot_word(sortedBeats[:5])
Top 5 HeartBeats con mayor probabilidad de muerte según la regresión logística
In [18]:
from operator import itemgetter
from scipy.stats.stats import pearsonr
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.5f' % x)
def find_pearson(value, patient, survived):
pearsonList = []
for i in range(value):
patientpear = patient[:, i]
pearson = pearsonr(patientpear, survived)
pearsonList.append({'grupo':i,'correlación':pearson[0],'p-valor':pearson[1]})
return sorted(pearsonList, key=itemgetter('correlación'), reverse=True)
sortedList = find_pearson(bestScore,patients_nmf,survived)
#sortedList = sorted(pearsonList, key=itemgetter('p1'), reverse=True)
df = pd.DataFrame(sortedList)
df = df.set_index('grupo')
df = df.sort_values(['correlación'], ascending=[False])
df
Out[18]:
correlación
p-valor
grupo
4
0.09908
0.01967
6
0.09041
0.03337
3
0.08380
0.04868
11
0.07882
0.06374
2
0.05747
0.17680
5
0.05044
0.23595
0
0.04251
0.31791
7
0.03834
0.36779
9
0.03613
0.39604
10
0.02537
0.55127
1
0.01667
0.69548
8
-0.02997
0.48152
In [19]:
columns = list(table)
components = nmf.components_
topword = print_top_words(components, columns,topic_index=sortedList[0]['grupo'])[0]['features']
subwords = []
for subword in topword:
if subword['p1']>0:
subwords.append(subword['word'])
print(str(subwords[:10]))
['fffa', 'hafa', 'agf', 'dabadb', 'fdfa', 'hfa', 'aaffd', 'ffda', 'fgh', 'gaj']
In [20]:
table = convert_matrix(sumvals=False,filter_words=tuple(subwords))
survived = table.index.labels[1].tolist()
patients = table.values
table
(553, 276)
Out[20]:
aabadd
aabaf
aabafb
aabaga
aabbg
aabbgb
aabdab
aabdad
aabdb
aabdbb
...
gha
haf
hafa
hag
haj
hbf
hfa
hfg
hga
hgd
subject_id
isAlive
135
1
0
0
0
0
0
0
1
0
0
0
...
0
0
0
0
0
0
0
0
0
0
151
1
1
0
0
1
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
177
1
0
0
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
214
1
0
1
0
1
0
0
0
0
1
1
...
0
0
0
0
0
0
0
0
0
0
263
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
279
1
0
0
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
283
1
1
0
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
368
1
0
1
1
1
1
1
1
0
0
0
...
0
0
0
0
0
0
0
0
0
0
377
1
1
1
1
1
0
0
1
1
1
1
...
0
1
1
0
0
0
1
0
0
0
408
1
0
0
0
0
0
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
462
0
1
1
1
0
0
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
618
1
0
0
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
638
1
0
1
1
1
1
0
1
0
1
1
...
0
0
0
0
1
0
0
0
1
0
682
1
0
1
0
0
0
0
0
0
1
0
...
1
1
1
1
1
0
1
0
1
0
736
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
743
1
0
0
0
0
0
0
1
0
1
1
...
0
1
0
1
1
0
1
0
0
0
749
1
0
1
0
0
0
0
0
1
1
1
...
0
0
0
0
0
0
0
0
0
0
793
1
1
1
1
1
0
0
1
0
0
0
...
0
0
0
0
0
0
0
0
0
0
886
1
1
1
0
0
0
0
1
0
0
0
...
1
1
1
0
0
0
1
0
0
0
1004
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
1075
1
1
1
0
1
1
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
1
1144
0
0
0
0
0
1
0
0
0
1
0
...
1
0
0
0
0
0
0
0
1
0
1160
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
1222
0
0
1
1
0
1
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
1226
1
0
1
1
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
1459
0
1
1
1
0
0
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
1528
1
1
1
1
1
0
0
1
0
1
1
...
1
0
0
0
0
0
0
0
0
0
1531
1
0
0
0
0
0
0
0
0
1
0
...
0
1
1
0
0
0
0
0
0
0
1569
1
0
1
1
0
1
0
1
1
1
1
...
1
1
0
1
0
0
0
0
1
0
1924
1
0
1
1
0
0
0
1
0
1
1
...
1
0
0
1
0
0
0
0
0
0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
23034
1
0
1
1
0
0
0
1
1
1
1
...
1
1
1
1
0
0
0
0
0
1
23097
1
1
1
1
1
1
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
23120
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23130
1
0
1
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
23178
1
0
1
1
0
1
1
1
0
1
1
...
0
0
0
0
0
0
0
0
0
1
23200
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23298
0
0
0
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
23336
1
0
1
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23363
1
0
1
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23384
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23401
1
1
1
0
0
0
0
1
1
1
1
...
0
0
0
0
0
0
0
0
0
0
23451
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23468
1
1
0
0
1
0
0
1
1
1
1
...
1
0
0
1
0
0
0
0
0
0
23474
1
0
1
0
1
1
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
1
23510
1
0
0
0
1
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
23944
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24004
1
0
1
0
0
0
0
1
0
1
0
...
0
0
0
0
0
0
0
0
0
0
24076
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24129
1
0
0
0
0
0
0
1
0
0
0
...
0
0
0
0
0
0
0
0
1
0
24133
0
0
1
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24142
1
0
1
1
1
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
24152
1
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
24185
1
0
0
0
0
0
0
1
0
1
1
...
0
0
0
0
0
0
0
0
0
0
24227
0
0
0
0
0
0
0
0
0
1
0
...
0
0
0
0
0
0
0
0
0
0
25466
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
41962
1
1
1
0
0
0
0
1
1
1
0
...
1
1
1
1
0
0
1
1
1
1
42261
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
42410
0
0
0
0
0
0
0
0
0
1
0
...
0
0
0
0
0
0
0
0
0
0
42492
0
0
1
1
1
0
0
0
0
1
0
...
0
0
0
0
0
0
0
0
0
0
43459
0
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
553 rows × 276 columns
In [21]:
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)
(553, 276)
(442, 276)
(111, 276)
In [22]:
model, acurracy, roc_auc = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
In [23]:
columns = list(table.columns.values)
pearsonList = []
for i in range(len(columns)):
pearson = pearsonr(patients[:,i],survived)
word = columns[i]
count = countPatients(word)
deadPatient,patient = count['deadPatient'],count['patient']
percent = deadPatient/patient
pearsonList.append({'word':word,'correlación':pearson[0],'p-valor':pearson[1],'pacientes muertos':"{0:.2%}".format(percent)+" de %d" %patient})
In [24]:
df = pd.DataFrame(pearsonList)
df = df.set_index('word')
df = df.sort_values(['correlación'], ascending=[False])
df
Out[24]:
correlación
p-valor
pacientes muertos
word
adc
0.20680
0.00000
89.68% de 155
fdf
0.20472
0.00000
96.39% de 83
dbda
0.18304
0.00001
85.07% de 221
fbd
0.17969
0.00002
87.27% de 165
dff
0.17956
0.00002
93.18% de 88
fff
0.17720
0.00003
93.83% de 81
ffd
0.17615
0.00003
91.26% de 103
addaba
0.17142
0.00005
90.57% de 106
haf
0.17138
0.00005
95.45% de 66
bdbd
0.16961
0.00006
85.79% de 183
fabd
0.16536
0.00009
90.29% de 103
fabdb
0.16373
0.00011
100.00% de 42
ddba
0.16192
0.00013
84.02% de 219
bbdd
0.16174
0.00013
85.47% de 179
ffdd
0.16161
0.00014
100.00% de 41
ababaf
0.16127
0.00014
90.10% de 101
dgf
0.16115
0.00014
97.92% de 48
bdd
0.16093
0.00014
82.53% de 269
dbbd
0.16070
0.00015
84.90% de 192
ajd
0.15947
0.00017
100.00% de 40
fbf
0.15922
0.00017
90.00% de 100
daf
0.15816
0.00019
83.48% de 230
bdaad
0.15737
0.00020
85.89% de 163
afd
0.15732
0.00020
83.71% de 221
ddaba
0.15706
0.00021
86.81% de 144
ffh
0.15513
0.00025
100.00% de 38
addab
0.15506
0.00025
86.71% de 143
dfd
0.15466
0.00026
86.96% de 138
afdf
0.15300
0.00030
96.08% de 51
cafa
0.15191
0.00034
93.65% de 63
...
...
...
...
bdbaad
0.11819
0.00539
88.16% de 76
dbbbad
0.11819
0.00539
88.16% de 76
abfbab
0.11819
0.00539
88.16% de 76
abdd
0.11792
0.00549
82.23% de 197
aabaga
0.11782
0.00554
87.65% de 81
bacd
0.11780
0.00554
86.14% de 101
adbbda
0.11780
0.00554
86.14% de 101
abdab
0.11778
0.00555
79.36% de 344
abafb
0.11759
0.00563
83.78% de 148
aabaf
0.11758
0.00563
81.39% de 231
fbbd
0.11755
0.00565
86.81% de 91
bbdda
0.11727
0.00576
85.00% de 120
aabadd
0.11673
0.00599
85.22% de 115
dadaba
0.11642
0.00613
84.50% de 129
badbd
0.11626
0.00620
85.45% de 110
bdbdb
0.11574
0.00644
84.68% de 124
bafba
0.11551
0.00654
83.67% de 147
bdda
0.11548
0.00655
81.69% de 213
aabdad
0.11512
0.00673
84.87% de 119
bdaada
0.11504
0.00677
84.21% de 133
acabaa
0.11483
0.00687
82.58% de 178
agd
0.11438
0.00709
83.44% de 151
ddbaa
0.11338
0.00761
82.93% de 164
addba
0.11338
0.00761
82.93% de 164
dbabab
0.11273
0.00797
81.01% de 237
afbba
0.11253
0.00808
82.11% de 190
aabdb
0.10912
0.01023
79.07% de 344
abdbbb
0.10867
0.01055
80.30% de 264
bdaab
0.10864
0.01057
79.17% de 336
baabd
0.10392
0.01449
79.14% de 326
276 rows × 3 columns
In [ ]:
Content source: davidgutierrez/HeartRatePatterns
Similar notebooks: