In [2]:
import numpy as np
import logging
import matplotlib.pyplot as plt
import sys
sys.path.append('..')
try:
import user_project_config as conf
except:
import project_config as conf
from IO import data_loading as dl
from utils import logg
from utils import data_processing as dp
from models_utils import models_utils as mu
import pandas as pd
In [4]:
data = pd.read_csv(conf.path_to_dta+'selected.csv')
In [5]:
data
Out[5]:
GIDN
M1_3
M6_1
M6_2
M6_3
M6_3a
M6_4
M6_5
M6_6
M6_7
...
GH
PCS
MCS
SelfHealth2
Angina
MyoInfar
SBP
DBP
GRIP
MetabSyn
0
110270
males
Ex-smoker
12
65
No
10
NaN
0
Yes
...
30
60.695800
23.818325
Good or fair
0
0
129.0
74.0
25
0
1
110280
males
Ex-smoker
15
80
Yes
20
NaN
0
Yes
...
20
NaN
NaN
Good or fair
0
0
162.0
66.0
19
0
2
110307
males
Ex-smoker
18
73
No
NaN
NaN
0
Yes
...
50
31.822480
71.938405
Good or fair
0
0
146.0
71.0
30
1
3
110318
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
No I have given up drinking
...
40
38.419545
38.142345
�Poor�
1
0
170.0
75.5
13
0
4
110357
males
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
45
NaN
NaN
Good or fair
0
0
150.0
64.0
39
0
5
110477
males
Ex-smoker
35
70
No
20
NaN
0
No I have given up drinking
...
60
55.156880
43.633830
Good or fair
1
1
91.0
61.0
35
0
6
110552
males
Current smoker
17
NaN
No
10
10
2
Yes
...
65
64.290890
63.625740
Good or fair
1
0
158.0
74.0
37
1
7
110579
males
Ex-smoker
14
55
No
10
NaN
NaN
Yes
...
30
47.954470
54.691520
Good or fair
0
0
161.0
77.0
47
0
8
110664
males
Ex-smoker
14
28
NaN
20
0
0
Yes
...
30
33.360510
25.167710
�Poor�
1
0
137.0
83.5
30
0
9
110760
males
Ex-smoker
16
41
NaN
30
NaN
0
Yes
...
70
62.955745
62.720620
Good or fair
0
0
121.0
63.0
34
0
10
110784
males
Ex-smoker
20
70
Yes
20
NaN
0
Yes
...
45
35.217035
43.932910
Good or fair
NaN
NaN
99.0
62.0
19
0
11
110788
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
40
48.617470
53.330970
Good or fair
1
0
138.0
79.0
25
0
12
110797
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
60
75.218357
36.563033
Good or fair
0
0
181.0
105.0
38
1
13
110831
males
Current smoker
16
NaN
No
10
20
0
Yes
...
30
17.172360
59.668385
Good or fair
1
1
160.0
80.0
25
0
14
110840
males
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
45
14.834825
54.786075
Good or fair
0
0
95.0
60.0
22
0
15
110863
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
No I have never drunk
...
30
NaN
NaN
Good or fair
0
0
134.0
82.0
33
0
16
110869
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
45
62.919947
40.307872
Good or fair
0
0
123.0
76.0
39
0
17
110891
males
Ex-smoker
18
60
No
8
NaN
1
Yes
...
20
27.189128
48.489703
�Poor�
0
0
141.0
76.0
39
0
18
110899
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
No I have never drunk
...
60
36.717728
55.787228
Good or fair
0
0
188.0
82.0
31
1
19
110902
males
Ex-smoker
18
73
No
10
NaN
1
Yes
...
70
79.075660
39.761410
Good or fair
0
0
171.0
90.0
51
1
20
110903
males
Never smoked
NaN
NaN
NaN
NaN
NaN
1
Yes
...
50
49.312500
35.074725
Good or fair
0
0
152.0
86.0
36
0
21
110953
males
Current smoker
13
NaN
No
10
13
1
No I have never drunk
...
20
36.533720
22.614470
�Poor�
1
1
153.0
71.0
32
1
22
111042
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
60
59.003110
45.874685
Good or fair
0
0
135.0
77.0
40
0
23
111082
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
45
65.871710
54.550410
Good or fair
0
0
140.0
66.0
41
1
24
111083
males
Ex-smoker
23
72
No
5
NaN
0
Yes
...
55
71.645080
40.892180
Good or fair
0
0
143.0
83.0
45
0
25
111117
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
45
57.076700
56.152875
Good or fair
1
0
165.0
109.0
20
0
26
111121
males
Ex-smoker
25
32
No
15
NaN
0
Yes
...
30
49.280738
30.552038
Good or fair
0
0
147.0
76.0
24
0
27
111122
males
Ex-smoker
21
25
No
7
NaN
2
Yes
...
55
28.415370
51.786820
�Poor�
0
0
157.0
90.0
30
1
28
111178
males
Ex-smoker
12
45
No
20
NaN
0
Yes
...
35
NaN
NaN
Good or fair
0
0
145.0
84.0
35
0
29
111202
males
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
40
37.780490
47.451490
Good or fair
1
0
215.0
82.0
41
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1770
810382
males
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
35
NaN
NaN
Good or fair
0
0
126.0
71.0
42
0
1771
810383
males
Ex-smoker
21
26
No
NaN
NaN
NaN
Yes
...
40
69.536240
46.472590
Good or fair
0
0
130.0
75.5
40
0
1772
810395
males
Never smoked
NaN
NaN
NaN
NaN
NaN
1
Yes
...
45
69.038080
55.397480
Good or fair
0
0
131.0
95.0
54
0
1773
810399
males
Ex-smoker
20
40
No
20
NaN
0
Yes
...
40
NaN
NaN
Good or fair
0
0
115.0
63.0
45
0
1774
820019
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
No I have never drunk
...
35
56.452832
44.861583
�Poor�
0
0
125.0
73.0
30
1
1775
820028
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
45
59.594412
23.960888
Good or fair
0
0
130.0
77.0
29
1
1776
820047
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
25
36.744170
40.160170
�Poor�
0
0
135.0
81.0
27
0
1777
820050
females
Ex-smoker
30
32
No
1
NaN
0
Yes
...
65
67.428100
60.859500
Good or fair
0
0
118.0
77.0
28
1
1778
820086
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
No I have never drunk
...
45
49.634690
45.472990
Good or fair
1
0
165.0
78.0
28
1
1779
820148
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
45
57.392267
42.913517
Good or fair
0
0
151.0
88.0
27
0
1780
820162
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
45
56.113745
54.670470
Good or fair
0
0
161.0
84.0
27
1
1781
820212
females
Ex-smoker
40
42
No
2
NaN
0
Yes
...
10
55.082090
41.929040
�Poor�
1
1
128.0
72.0
22
0
1782
820261
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
50
45.096840
40.649615
Good or fair
1
0
150.0
80.0
25
0
1783
820274
females
Ex-smoker
24
34
No
7
NaN
NaN
No I have never drunk
...
20
15.243285
41.806360
�Poor�
0
0
126.0
87.0
9
0
1784
820276
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
No I have never drunk
...
30
-1.670710
70.919115
�Poor�
0
0
153.0
93.0
NaN
0
1785
820278
females
Current smoker
23
NaN
No
15
10
2
Yes
...
40
57.949837
41.218488
Good or fair
1
0
131.0
70.0
24
1
1786
820303
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
30
37.573060
27.140235
�Poor�
1
0
149.5
80.0
16
1
1787
820312
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
50
NaN
NaN
Good or fair
1
0
154.0
82.0
21
1
1788
820321
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
45
NaN
NaN
Good or fair
0
0
110.0
73.0
32
0
1789
820334
females
Never smoked
NaN
NaN
NaN
NaN
NaN
1
Yes
...
45
33.424270
56.268820
Good or fair
0
0
145.0
82.0
27
0
1790
820350
females
Never smoked
NaN
NaN
NaN
0
0
0
Yes
...
30
NaN
NaN
Good or fair
0
0
180.0
90.0
30
1
1791
820408
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
80
76.648740
59.117240
Good or fair
0
0
110.0
81.0
25
0
1792
820424
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
55
69.157240
39.644140
Good or fair
0
0
153.0
87.0
30
1
1793
820444
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
70
78.180310
33.650110
Good or fair
0
0
121.0
64.5
22
0
1794
820494
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
No I have never drunk
...
15
3.173860
50.900985
�Poor�
1
0
180.0
103.0
16
0
1795
820540
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
Yes
...
30
33.737570
38.536470
Good or fair
0
0
128.0
80.0
16
0
1796
820549
females
Never smoked
NaN
NaN
NaN
NaN
NaN
NaN
No I have given up drinking
...
35
46.295940
56.556490
Good or fair
0
0
116.0
76.5
19
1
1797
820589
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
60
71.723830
40.900780
Good or fair
0
0
128.0
78.0
29
0
1798
820980
females
Never smoked
NaN
NaN
NaN
NaN
NaN
0
Yes
...
65
57.554860
61.971760
Good or fair
0
0
116.0
62.0
30
0
1799
821064
females
Ex-smoker
20
58
No
8
NaN
0
Yes
...
35
NaN
NaN
Good or fair
1
0
117.0
66.0
31
0
1800 rows × 111 columns
In [72]:
GIDN = 110357
GIDN = 110280
features_code = {'stenocard': ['selected', 'M8_4_19'],
'kidney1': ['selected', 'M8_4_28'],
'kidney2': ['selected', 'M8_4_29']}
table_name, objective_in_table = features_code['stenocard'] #['kidney1']#
ans = data[data['GIDN']==GIDN][objective_in_table] #
print 'ans', ans
dir(ans)
#b= ans.as_matrix()[0]#.unique()
#print type(b)
# ['No' 'Have now' 'Have had' nan]
#np.isnan(ans.values) #== 'Have now'#.get_value(4)
ans.values[0]
d = data[objective_in_table].values
#data[objective_in_table]d
#np.where(d==np.nan)
np.unique(d, return_index=True, return_inverse=True, return_counts=True)
np.isnan(d[87])
ans 1 Have now
Name: M8_4_19, dtype: object
Out[72]:
True
In [74]:
OBJECTIVE_NAME = 'some_diseases_ver1'
sample_name = OBJECTIVE_NAME + '_1' # train-test filename
SEED = 0
################################################################
# Prepare train and test samples
trainX, trainY, testX, testY, sample_info = dl.load_hdf5_sample(sample_name)
In [82]:
np.sum(np.isnan(testY), axis=0)
Out[82]:
array([0, 0, 0])
Content source: prikhodkop/ECG_project
Similar notebooks: