In [2]:
import numpy as np
import logging
import matplotlib.pyplot as plt

import sys
sys.path.append('..')

try:
  import user_project_config as conf
except:
  import project_config as conf

from IO import data_loading as dl
from utils import logg 
from utils import data_processing as dp
from models_utils import models_utils as mu

import pandas as pd

In [4]:
data = pd.read_csv(conf.path_to_dta+'selected.csv')

In [5]:
data


Out[5]:
GIDN M1_3 M6_1 M6_2 M6_3 M6_3a M6_4 M6_5 M6_6 M6_7 ... GH PCS MCS SelfHealth2 Angina MyoInfar SBP DBP GRIP MetabSyn
0 110270 males Ex-smoker 12 65 No 10 NaN 0 Yes ... 30 60.695800 23.818325 Good or fair 0 0 129.0 74.0 25 0
1 110280 males Ex-smoker 15 80 Yes 20 NaN 0 Yes ... 20 NaN NaN Good or fair 0 0 162.0 66.0 19 0
2 110307 males Ex-smoker 18 73 No NaN NaN 0 Yes ... 50 31.822480 71.938405 Good or fair 0 0 146.0 71.0 30 1
3 110318 males Never smoked NaN NaN NaN NaN NaN 0 No I have given up drinking ... 40 38.419545 38.142345 �Poor� 1 0 170.0 75.5 13 0
4 110357 males Never smoked NaN NaN NaN NaN NaN NaN Yes ... 45 NaN NaN Good or fair 0 0 150.0 64.0 39 0
5 110477 males Ex-smoker 35 70 No 20 NaN 0 No I have given up drinking ... 60 55.156880 43.633830 Good or fair 1 1 91.0 61.0 35 0
6 110552 males Current smoker 17 NaN No 10 10 2 Yes ... 65 64.290890 63.625740 Good or fair 1 0 158.0 74.0 37 1
7 110579 males Ex-smoker 14 55 No 10 NaN NaN Yes ... 30 47.954470 54.691520 Good or fair 0 0 161.0 77.0 47 0
8 110664 males Ex-smoker 14 28 NaN 20 0 0 Yes ... 30 33.360510 25.167710 �Poor� 1 0 137.0 83.5 30 0
9 110760 males Ex-smoker 16 41 NaN 30 NaN 0 Yes ... 70 62.955745 62.720620 Good or fair 0 0 121.0 63.0 34 0
10 110784 males Ex-smoker 20 70 Yes 20 NaN 0 Yes ... 45 35.217035 43.932910 Good or fair NaN NaN 99.0 62.0 19 0
11 110788 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 40 48.617470 53.330970 Good or fair 1 0 138.0 79.0 25 0
12 110797 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 60 75.218357 36.563033 Good or fair 0 0 181.0 105.0 38 1
13 110831 males Current smoker 16 NaN No 10 20 0 Yes ... 30 17.172360 59.668385 Good or fair 1 1 160.0 80.0 25 0
14 110840 males Never smoked NaN NaN NaN NaN NaN NaN Yes ... 45 14.834825 54.786075 Good or fair 0 0 95.0 60.0 22 0
15 110863 males Never smoked NaN NaN NaN NaN NaN 0 No I have never drunk ... 30 NaN NaN Good or fair 0 0 134.0 82.0 33 0
16 110869 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 45 62.919947 40.307872 Good or fair 0 0 123.0 76.0 39 0
17 110891 males Ex-smoker 18 60 No 8 NaN 1 Yes ... 20 27.189128 48.489703 �Poor� 0 0 141.0 76.0 39 0
18 110899 males Never smoked NaN NaN NaN NaN NaN 0 No I have never drunk ... 60 36.717728 55.787228 Good or fair 0 0 188.0 82.0 31 1
19 110902 males Ex-smoker 18 73 No 10 NaN 1 Yes ... 70 79.075660 39.761410 Good or fair 0 0 171.0 90.0 51 1
20 110903 males Never smoked NaN NaN NaN NaN NaN 1 Yes ... 50 49.312500 35.074725 Good or fair 0 0 152.0 86.0 36 0
21 110953 males Current smoker 13 NaN No 10 13 1 No I have never drunk ... 20 36.533720 22.614470 �Poor� 1 1 153.0 71.0 32 1
22 111042 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 60 59.003110 45.874685 Good or fair 0 0 135.0 77.0 40 0
23 111082 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 45 65.871710 54.550410 Good or fair 0 0 140.0 66.0 41 1
24 111083 males Ex-smoker 23 72 No 5 NaN 0 Yes ... 55 71.645080 40.892180 Good or fair 0 0 143.0 83.0 45 0
25 111117 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 45 57.076700 56.152875 Good or fair 1 0 165.0 109.0 20 0
26 111121 males Ex-smoker 25 32 No 15 NaN 0 Yes ... 30 49.280738 30.552038 Good or fair 0 0 147.0 76.0 24 0
27 111122 males Ex-smoker 21 25 No 7 NaN 2 Yes ... 55 28.415370 51.786820 �Poor� 0 0 157.0 90.0 30 1
28 111178 males Ex-smoker 12 45 No 20 NaN 0 Yes ... 35 NaN NaN Good or fair 0 0 145.0 84.0 35 0
29 111202 males Never smoked NaN NaN NaN NaN NaN 0 Yes ... 40 37.780490 47.451490 Good or fair 1 0 215.0 82.0 41 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1770 810382 males Never smoked NaN NaN NaN NaN NaN NaN Yes ... 35 NaN NaN Good or fair 0 0 126.0 71.0 42 0
1771 810383 males Ex-smoker 21 26 No NaN NaN NaN Yes ... 40 69.536240 46.472590 Good or fair 0 0 130.0 75.5 40 0
1772 810395 males Never smoked NaN NaN NaN NaN NaN 1 Yes ... 45 69.038080 55.397480 Good or fair 0 0 131.0 95.0 54 0
1773 810399 males Ex-smoker 20 40 No 20 NaN 0 Yes ... 40 NaN NaN Good or fair 0 0 115.0 63.0 45 0
1774 820019 females Never smoked NaN NaN NaN NaN NaN NaN No I have never drunk ... 35 56.452832 44.861583 �Poor� 0 0 125.0 73.0 30 1
1775 820028 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 45 59.594412 23.960888 Good or fair 0 0 130.0 77.0 29 1
1776 820047 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 25 36.744170 40.160170 �Poor� 0 0 135.0 81.0 27 0
1777 820050 females Ex-smoker 30 32 No 1 NaN 0 Yes ... 65 67.428100 60.859500 Good or fair 0 0 118.0 77.0 28 1
1778 820086 females Never smoked NaN NaN NaN NaN NaN 0 No I have never drunk ... 45 49.634690 45.472990 Good or fair 1 0 165.0 78.0 28 1
1779 820148 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 45 57.392267 42.913517 Good or fair 0 0 151.0 88.0 27 0
1780 820162 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 45 56.113745 54.670470 Good or fair 0 0 161.0 84.0 27 1
1781 820212 females Ex-smoker 40 42 No 2 NaN 0 Yes ... 10 55.082090 41.929040 �Poor� 1 1 128.0 72.0 22 0
1782 820261 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 50 45.096840 40.649615 Good or fair 1 0 150.0 80.0 25 0
1783 820274 females Ex-smoker 24 34 No 7 NaN NaN No I have never drunk ... 20 15.243285 41.806360 �Poor� 0 0 126.0 87.0 9 0
1784 820276 females Never smoked NaN NaN NaN NaN NaN NaN No I have never drunk ... 30 -1.670710 70.919115 �Poor� 0 0 153.0 93.0 NaN 0
1785 820278 females Current smoker 23 NaN No 15 10 2 Yes ... 40 57.949837 41.218488 Good or fair 1 0 131.0 70.0 24 1
1786 820303 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 30 37.573060 27.140235 �Poor� 1 0 149.5 80.0 16 1
1787 820312 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 50 NaN NaN Good or fair 1 0 154.0 82.0 21 1
1788 820321 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 45 NaN NaN Good or fair 0 0 110.0 73.0 32 0
1789 820334 females Never smoked NaN NaN NaN NaN NaN 1 Yes ... 45 33.424270 56.268820 Good or fair 0 0 145.0 82.0 27 0
1790 820350 females Never smoked NaN NaN NaN 0 0 0 Yes ... 30 NaN NaN Good or fair 0 0 180.0 90.0 30 1
1791 820408 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 80 76.648740 59.117240 Good or fair 0 0 110.0 81.0 25 0
1792 820424 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 55 69.157240 39.644140 Good or fair 0 0 153.0 87.0 30 1
1793 820444 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 70 78.180310 33.650110 Good or fair 0 0 121.0 64.5 22 0
1794 820494 females Never smoked NaN NaN NaN NaN NaN NaN No I have never drunk ... 15 3.173860 50.900985 �Poor� 1 0 180.0 103.0 16 0
1795 820540 females Never smoked NaN NaN NaN NaN NaN NaN Yes ... 30 33.737570 38.536470 Good or fair 0 0 128.0 80.0 16 0
1796 820549 females Never smoked NaN NaN NaN NaN NaN NaN No I have given up drinking ... 35 46.295940 56.556490 Good or fair 0 0 116.0 76.5 19 1
1797 820589 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 60 71.723830 40.900780 Good or fair 0 0 128.0 78.0 29 0
1798 820980 females Never smoked NaN NaN NaN NaN NaN 0 Yes ... 65 57.554860 61.971760 Good or fair 0 0 116.0 62.0 30 0
1799 821064 females Ex-smoker 20 58 No 8 NaN 0 Yes ... 35 NaN NaN Good or fair 1 0 117.0 66.0 31 0

1800 rows × 111 columns


In [72]:
GIDN = 110357
GIDN = 110280


features_code = {'stenocard':  ['selected', 'M8_4_19'],
                'kidney1':     ['selected', 'M8_4_28'], 
                'kidney2':     ['selected', 'M8_4_29']}

table_name, objective_in_table = features_code['stenocard'] #['kidney1']#
ans = data[data['GIDN']==GIDN][objective_in_table] #
print 'ans', ans
dir(ans)

#b= ans.as_matrix()[0]#.unique()
#print type(b)
# ['No' 'Have now' 'Have had' nan]


#np.isnan(ans.values) #== 'Have now'#.get_value(4)
ans.values[0]

d = data[objective_in_table].values
#data[objective_in_table]d
#np.where(d==np.nan)

np.unique(d, return_index=True, return_inverse=True, return_counts=True)
np.isnan(d[87])


ans 1    Have now
Name: M8_4_19, dtype: object
Out[72]:
True

In [74]:
OBJECTIVE_NAME = 'some_diseases_ver1'
sample_name = OBJECTIVE_NAME + '_1' # train-test filename
SEED = 0


################################################################
# Prepare train and test samples
trainX, trainY, testX, testY, sample_info = dl.load_hdf5_sample(sample_name)

In [82]:
np.sum(np.isnan(testY), axis=0)


Out[82]:
array([0, 0, 0])