NMF

Imports


In [1]:
from time import time
import psycopg2
from collections import Counter
import gc
import pandas as pd
import numpy as np
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.metrics import classification_report
np.set_printoptions(suppress=True,precision=10)

In [2]:
import sys 
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from Matrix import convert_matrix
from NMF import find_best_NMF,find_pearson

In [3]:
t0=time()
table = convert_matrix(sumvals=False)
#sortColumns =['abbac','adadd']
#table = table.sort_values(sortColumns, ascending=False)
print("converMatrix done in %0.3fs." % (time() - t0))
table


(590, 58840)
converMatrix done in 60.389s.
Out[3]:
aaaaaaa aaaaaaaa aaaaaaab aaaaaaac aaaaaaad aaaaaaae aaaaaaaf aaaaaaag aaaaaaah aaaaaaai ... kkb kke lab lbb lbbb lbbc lcc leb lib libb
subject_id isAlive
20 0 1 1 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
135 1 1 1 1 0 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
151 1 1 1 1 1 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
177 1 1 1 1 0 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
214 1 1 1 1 1 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
263 1 1 1 1 0 1 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
279 1 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
283 1 1 1 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
368 1 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
377 1 1 1 1 1 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
408 1 1 1 1 0 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
462 0 1 1 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
618 1 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
638 1 1 1 1 1 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
682 1 1 1 1 1 1 1 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
736 0 1 1 1 0 0 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
743 1 1 1 1 1 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
749 1 1 1 1 1 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
793 1 1 1 1 1 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
886 1 1 1 1 0 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
952 1 1 1 1 1 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
974 0 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1004 1 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1075 1 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1144 0 1 1 1 1 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
1160 0 1 1 1 0 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
1222 0 1 1 1 0 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
1226 1 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1459 0 1 1 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1528 1 1 1 1 1 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
23178 1 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23193 0 1 1 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23200 0 1 1 1 1 0 0 0 0 0 0 ... 0 1 0 1 1 0 0 1 0 0
23298 0 1 1 1 1 1 1 1 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
23336 1 1 1 1 1 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23339 0 1 1 1 0 1 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
23363 1 1 1 1 1 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
23384 0 1 1 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23401 1 1 1 1 0 1 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
23451 1 1 1 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23468 1 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23474 1 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23510 1 1 1 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23944 1 1 1 1 1 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24004 1 1 1 1 1 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24030 0 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24076 1 1 1 1 0 1 0 1 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24129 1 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24133 0 1 1 1 1 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24142 1 1 1 1 0 1 0 1 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
24152 1 1 1 1 0 1 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24185 1 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24227 0 1 1 1 1 1 0 1 1 0 0 ... 0 0 0 0 0 0 0 0 0 0
25466 0 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
41962 1 1 1 1 0 1 0 1 1 1 0 ... 0 0 0 0 0 0 0 0 0 0
42255 1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
42261 0 1 1 1 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
42410 0 1 1 1 1 1 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
42492 0 1 1 1 1 1 0 1 1 0 1 ... 0 0 0 0 0 0 0 0 0 0
43459 0 1 1 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

590 rows × 58840 columns


In [4]:
from sklearn.model_selection import train_test_split
survived = table.index.labels[1].tolist()
patients = table.values
patients_train, patients_test,survived_train, survived_test = train_test_split(patients,survived,test_size=0.2, random_state=42)

In [5]:
print(table.shape)
print(patients_train.shape)
print(patients_test.shape)


(590, 58840)
(472, 58840)
(118, 58840)

NMF + Logistic Regression accurancy


In [6]:
find_best_NMF(patients,survived)


2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,

In [ ]: