calculates the feature scores for each split
In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import RandomizedLasso
import sys
import os
In [2]:
# load lb, test and CV CIDs
# load LB CIDs
with open(os.path.abspath('__file__' + "/../../../../data/CID_leaderboard.txt")) as f:
content = f.readlines()
lb_CIDs = list(content)
lb_CIDs = [int(x) for x in lb_CIDs]
# load test CIDs
with open(os.path.abspath('__file__' + "/../../../../data/CID_testset.txt")) as f:
content = f.readlines()
test_CIDs = list(content)
test_CIDs = [int(x) for x in test_CIDs]
In [3]:
#features = pd.read_csv('../../../data/linear_scores_and_features/features_dragon_morgan.csv')
features = pd.read_csv('features.csv')
features.head()
Out[3]:
CID
complexity from pubmed
MW
AMW
Sv
Se
Sp
Si
Mv
Me
...
91305518_2
91411526_2
91541756_2
91552833_2
91563027_2
91595028_2
91614181_2
91617014_2
91617930_2
91618238_2
0
126
0.181128
0.270753
0.030587
0.262264
0.219126
0.253846
0.214989
0.216981
0.425532
...
0.000013
0.000331
0.014024
0.000296
0.021098
0.000186
0.003159
0.002299
0.000138
0.011080
1
176
0.060311
0.109331
0.025411
0.096943
0.105579
0.090940
0.107335
0.125214
0.659574
...
0.000124
0.000205
0.008391
0.000930
0.001442
0.000094
0.000607
0.001362
0.000229
0.004162
2
177
0.020039
0.067721
0.015501
0.075556
0.083688
0.078074
0.089782
0.106346
0.382979
...
0.000014
0.000092
0.000961
0.000339
0.000657
0.000008
0.000098
0.000221
0.000037
0.001932
3
180
0.051167
0.104208
0.011542
0.121231
0.131248
0.127898
0.139362
0.099485
0.269504
...
0.000124
0.000205
0.003729
0.000930
0.000641
0.000094
0.000607
0.001961
0.000229
0.001850
4
196
0.221790
0.333247
0.023779
0.306622
0.308572
0.294339
0.305729
0.138079
0.539007
...
0.001029
0.000737
0.013662
0.009383
0.001954
0.000820
0.003130
0.005600
0.002189
0.010702
5 rows × 14613 columns
In [4]:
#trainsplits = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/cv_splits_train_big.csv',header=None)
#testsplits = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/cv_splits_test_big.csv',header=None)
In [5]:
trainsplits = pd.read_csv('../../../data/cv_splits_train_bigger.csv',header=None)
testsplits = pd.read_csv('../../../data/cv_splits_test_bigger.csv',header=None)
In [6]:
descriptor = {}
for idx, desc in enumerate([u'INTENSITY/STRENGTH', u'VALENCE/PLEASANTNESS', u'BAKERY',
u'SWEET', u'FRUIT', u'FISH', u'GARLIC', u'SPICES', u'COLD', u'SOUR', u'BURNT',
u'ACID', u'WARM', u'MUSKY', u'SWEATY', u'AMMONIA/URINOUS', u'DECAYED', u'WOOD',
u'GRASS', u'FLOWER', u'CHEMICAL']):
descriptor[idx] = desc
In [7]:
targets = pd.read_csv('targets_for_feature_selection.csv')
targets.columns = ['CID'] + list(targets.columns.values[1:])
targets.head()
Out[7]:
CID
INTENSITY/STRENGTH
VALENCE/PLEASANTNESS
BAKERY
SWEET
FRUIT
FISH
GARLIC
SPICES
COLD
...
ACID
WARM
MUSKY
SWEATY
AMMONIA/URINOUS
DECAYED
WOOD
GRASS
FLOWER
CHEMICAL
0
126
37.102041
50.081081
0.500000
21.959459
7.405405
0.175676
2.162162
4.554054
4.662162
...
4.094595
2.486486
7.216216
1.391892
2.554054
4.675676
0.891892
1.662162
8.094595
15.283784
1
176
8.051020
45.344828
2.275862
5.103448
1.137931
0.000000
6.448276
5.965517
4.793103
...
3.896552
5.448276
6.448276
3.551724
3.275862
4.275862
2.413793
2.482759
6.724138
7.724138
2
177
22.387755
48.418182
9.363636
19.781818
3.000000
0.763636
1.254545
2.472727
6.709091
...
3.563636
3.218182
6.218182
1.945455
2.727273
3.872727
0.727273
3.454545
4.090909
14.200000
3
196
14.530612
44.304348
1.304348
9.804348
0.913043
0.500000
3.239130
7.108696
2.152174
...
5.543478
6.695652
9.043478
7.304348
2.152174
4.217391
1.195652
1.543478
6.695652
7.847826
4
239
24.683673
51.724138
1.362069
13.500000
4.293103
1.482759
4.534483
6.189655
4.965517
...
3.241379
5.068966
6.534483
0.793103
0.931034
5.413793
3.120690
5.775862
9.396552
10.862069
5 rows × 22 columns
In [8]:
import datetime
In [9]:
features.shape
Out[9]:
(476, 14613)
In [13]:
import sys
sys.path.append('/Users/rgerkin/Dropbox/science/olfaction-prediction')
sys.path.append('/Users/rgerkin/Dropbox/science/olfaction-prediction/opc_python/utils')
import loading
training_leaderboard_CIDs = sorted(loading.get_CIDs('training')+loading.get_CIDs('leaderboard'))
In [17]:
from sklearn.cross_validation import ShuffleSplit
test_size = 0.17
splits = ShuffleSplit(len(training_leaderboard_CIDs),n_iter=250,test_size=test_size,random_state=0)
i = 0
for train,test in splits:
print([training_leaderboard_CIDs[i] for i in test])
i+=1
if i==5:
break
[23235, 7824, 6276, 14104, 8030, 61185, 13216, 14514, 12810, 7519, 170833, 8091, 93375, 62572, 325, 12580, 6501, 22386, 61918, 7151, 8063, 159055, 62374, 8078, 62444, 8918, 7991, 61138, 379, 6114390, 8077, 556940, 8892, 6826, 2346, 6943, 61199, 8697, 637776, 6057, 14228, 89440, 61337, 8193, 241, 176, 61204, 61177, 6213, 31249, 8093, 165675, 61192, 519539, 565690, 7463, 8857, 5363491, 61151, 6050, 263, 1001, 5541, 6997, 440967, 239, 61130, 10882, 78925, 18635]
[7127, 18635, 1549026, 7632, 10882, 6106, 2969, 311, 6213, 8078, 356, 8051, 8615, 7047, 31210, 7409, 61331, 1030, 61945, 2879, 5366244, 23642, 13436, 15606, 61177, 8174, 6276, 5363233, 7820, 11980, 8077, 454, 7761, 1068, 8205, 6137, 31260, 778574, 2345, 14257, 18827, 31252, 1060, 326, 14525, 7341, 13187, 8180, 61809, 61052, 6920, 7824, 10364, 6998, 6054, 8363, 8093, 244, 1550470, 61209, 8082, 11428, 7969, 61138, 8857, 12206, 31249, 7136, 8375, 176]
[10895, 62444, 5541, 24020, 14328, 8048, 9609, 2345, 887, 8125, 61386, 7136, 61204, 5363491, 520108, 6374, 8892, 11525, 6050, 33032, 89440, 18827, 6054, 5367706, 6569, 2758, 104721, 11419, 7122, 7969, 240, 650, 643820, 2214, 5610, 7194, 12748, 8467, 31260, 126, 6386, 9261, 7768, 7894, 325, 61252, 61199, 6137, 8914, 1549778, 638024, 8785, 31210, 7803, 7915, 1551246, 244, 61130, 165675, 638014, 356, 7921, 93375, 14296, 2346, 7410, 7695, 6287, 7916, 6549]
[11980, 650, 89440, 7601, 5363388, 444539, 6054, 62374, 235414, 78925, 6549, 7194, 8857, 1136, 8467, 61209, 5365027, 22873, 637776, 7795, 565690, 24834, 5364729, 7916, 8038, 62572, 6386, 6826, 8175, 8452, 241, 4133, 8294, 18827, 1183, 61252, 7151, 61155, 7583, 10430, 8785, 1549778, 6140, 61659, 61177, 69963, 9016, 12587, 2969, 8375, 15717, 18635, 526618, 660, 679, 24197, 6057, 196, 7635, 6998, 311, 7047, 5368076, 261, 61138, 7894, 170833, 228769, 5960, 5315892]
[2879, 16255, 15606, 11527, 10430, 7151, 5367698, 641423, 6943, 61204, 6137, 660, 8193, 10882, 11124, 8363, 10448, 8375, 8785, 31404, 5780, 7991, 61653, 6997, 24197, 12580, 7997, 7601, 650, 6569, 22386, 14104, 14296, 244, 24473, 11525, 61027, 15380, 228769, 62902, 326, 8180, 5280450, 8163, 9256, 101010, 27458, 24834, 2682, 520191, 16324, 10400, 8294, 8918, 14286, 31272, 10285, 7463, 11980, 6584, 61024, 62725, 61527, 8186, 31249, 4133, 519539, 6998, 1060, 5365027]
In [22]:
testsplits.head()
Out[22]:
0
1
2
3
4
5
6
7
8
9
...
59
60
61
62
63
64
65
66
67
68
0
23235
7824
6276
14104
8030
61185
13216
14514
12810
7519
...
6050
263
1001
5541
6997
440967
239
61130
10882
78925
1
7127
18635
1549026
7632
10882
6106
2969
311
6213
8078
...
61209
8082
11428
7969
61138
8857
12206
31249
7136
8375
2
10895
62444
5541
24020
14328
8048
9609
2345
887
8125
...
638014
356
7921
93375
14296
2346
7410
7695
6287
7916
3
11980
650
89440
7601
5363388
444539
6054
62374
235414
78925
...
6998
311
7047
5368076
261
61138
7894
170833
228769
5960
4
2879
16255
15606
11527
10430
7151
5367698
641423
6943
61204
...
6584
61024
62725
61527
8186
31249
4133
519539
6998
1060
5 rows × 69 columns
In [30]:
[int(x) if not np.isnan(x) else x for x in sorted(list(trainsplits.loc[0]))]
Out[30]:
[126,
177,
196,
240,
243,
244,
261,
311,
326,
356,
454,
460,
650,
660,
679,
807,
875,
887,
957,
994,
1030,
1032,
1049,
1060,
1068,
1110,
1136,
1183,
2214,
2345,
2682,
2758,
2879,
2969,
3776,
3893,
4133,
5610,
5779,
5780,
5950,
5960,
5961,
6054,
6106,
6137,
6140,
6184,
6287,
6374,
6386,
6448,
6505,
6549,
6560,
6569,
6584,
6590,
6658,
6753,
6920,
6989,
6998,
7047,
7059,
7095,
7119,
7122,
7127,
7136,
7144,
7147,
7150,
7165,
7194,
7288,
7335,
7341,
7360,
7361,
7409,
7410,
7500,
7583,
7593,
7601,
7632,
7635,
7654,
7695,
7720,
7731,
7749,
7761,
7762,
7765,
7768,
7792,
7795,
7799,
7803,
7820,
7826,
7894,
7915,
7916,
7921,
7937,
7967,
7969,
7983,
7997,
8007,
8038,
8042,
8048,
8051,
8082,
8103,
8118,
8122,
8125,
8129,
8137,
8159,
8163,
8174,
8175,
8180,
8184,
8186,
8205,
8294,
8363,
8375,
8452,
8456,
8467,
8615,
8635,
8658,
8712,
8723,
8785,
8797,
8908,
8914,
9016,
9024,
9025,
9256,
9261,
9589,
9609,
9862,
10285,
10364,
10400,
10430,
10448,
10722,
10748,
10797,
10890,
10895,
10925,
11086,
11124,
11419,
11428,
11525,
11527,
11529,
11569,
11583,
11614,
11617,
11902,
11980,
12097,
12178,
12180,
12206,
12297,
12327,
12348,
12506,
12587,
12741,
12748,
12813,
13187,
13204,
13436,
13561,
14257,
14286,
14296,
14328,
14491,
14525,
15037,
15380,
15510,
15606,
15717,
16255,
16324,
16741,
17121,
17525,
17617,
18554,
18827,
19310,
21057,
21648,
22310,
22873,
23642,
24020,
24116,
24197,
24473,
24513,
24834,
26331,
27457,
27458,
31209,
31210,
31225,
31234,
31244,
31246,
31252,
31260,
31265,
31266,
31268,
31272,
31404,
32594,
33032,
36822,
60999,
61005,
61024,
61027,
61048,
61052,
61155,
61209,
61229,
61252,
61293,
61325,
61331,
61386,
61408,
61527,
61641,
61653,
61659,
61670,
61771,
61809,
61945,
62087,
62144,
62332,
62375,
62378,
62580,
62725,
62835,
62900,
62902,
69963,
82227,
85522,
88454,
92979,
98330,
101010,
101604,
104721,
106997,
145742,
220674,
228769,
235414,
246728,
439570,
444539,
444683,
444972,
520108,
520191,
520296,
521238,
526618,
595928,
637563,
637566,
637796,
638014,
638024,
641256,
641423,
643820,
778574,
1549026,
1549778,
1550470,
1551246,
3578033,
5273467,
5280450,
5315892,
5352539,
5355850,
5362588,
5362798,
5362814,
5363233,
5363388,
5364729,
5365027,
5365049,
5366244,
5367698,
5367706,
5368076,
6429333,
6999977,
16220109,
nan]
In [64]:
for k in range(0,5): #set the range of splits here
# set a cv split as holdout data.
lb_CIDs = testsplits.ix[k,:].values
features = features[~features.CID.isin(test_CIDs)] # # remove the final test data features - this seems to be reduntant after the first iteration
#print(targets.shape,features.shape)
train_targets = targets[~targets['CID'].isin(lb_CIDs)] # remove lb_data and use the rest
train_features = features[~features.CID.isin(lb_CIDs)] # remove lb_data and use the rest
train,test = list(splits)[k]
#should_use = [training_leaderboard_CIDs[i] for i in train]
#used = list(train_features['CID'])
#should_use = [int(x) for x in sorted(list(trainsplits.loc[k])) if not np.isnan(x)]
#print(" ")
#print("k=%d" % k)
#print("Used: n=%d" % len(used),used)
#print(" ")
#print("Should use: n=%d" % len(should_use),should_use)
#print("Equal?",used == should_use)
#print(set(used).difference(should_use))
#print(" ")
#print(" ")
#print(" ")
#print(" ")
#print(" ")
#feature selection
#if not os.path.exists('scores/LB_scores_morgan' + str(k)):
# os.makedirs('scores/LB_scores_morgan' + str(k))
for idx in range(21):
if k < 0 and idx < 0: # in case the selection stops at a point, set the right numbers to continue (selection takes time)
pass
else:
print(datetime.datetime.now())
print('split ' + str(k))
print('selection for descriptor: ' + descriptor[idx])
sys.stdout.flush()
Y = train_targets[descriptor[idx]]
X = train_features.ix[:,1:]
selector = RandomizedLasso(alpha=0.025,selection_threshold=0.001,verbose=1,n_resampling=10,
random_state=12).fit(X,Y)
Y = targets[descriptor[idx]]
X = train_features.ix[:,1:]
selector2 = RandomizedLasso(alpha=0.025,selection_threshold=0.001,verbose=1,n_resampling=10,
random_state=12).fit(X,Y)
scores = pd.DataFrame(selector.scores_,index=X.columns)
print(selector.scores_.shape,X.columns)
print(scores.head())
#scores.to_csv('scores/LB_scores_morgan' + str(k) + '/scores_'+str(idx)+'.csv')
"""
k=0
Used: n=338 [126, 177, 196, 240, 243, 244, 261, 311, 326, 356, 454, 460, 650, 660, 679, 807, 875, 887, 957, 994, 1030, 1032, 1049, 1060, 1068, 1110, 1136, 1183, 2214, 2345, 2682, 2758, 2879, 2969, 3776, 3893, 4133, 5610, 5779, 5780, 5950, 5960, 5961, 6054, 6106, 6137, 6140, 6184, 6287, 6374, 6386, 6448, 6505, 6549, 6560, 6569, 6584, 6590, 6658, 6753, 6920, 6989, 6998, 7047, 7059, 7095, 7119, 7122, 7127, 7136, 7144, 7147, 7150, 7165, 7194, 7288, 7335, 7341, 7360, 7361, 7409, 7410, 7500, 7583, 7593, 7601, 7632, 7635, 7654, 7695, 7720, 7731, 7749, 7761, 7762, 7765, 7768, 7792, 7795, 7799, 7803, 7820, 7826, 7894, 7915, 7916, 7921, 7937, 7967, 7969, 7983, 7997, 8007, 8038, 8042, 8048, 8051, 8082, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8163, 8174, 8175, 8180, 8184, 8186, 8205, 8294, 8363, 8375, 8452, 8456, 8467, 8615, 8635, 8658, 8712, 8723, 8785, 8797, 8908, 8914, 9016, 9024, 9025, 9256, 9261, 9589, 9609, 9862, 10285, 10364, 10400, 10430, 10448, 10722, 10748, 10797, 10890, 10895, 10925, 11086, 11124, 11419, 11428, 11525, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 11980, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12587, 12741, 12748, 12813, 13187, 13204, 13436, 13561, 14257, 14286, 14296, 14328, 14491, 14525, 15037, 15380, 15510, 15606, 15717, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 18635, 18827, 19310, 21057, 21648, 22310, 22873, 23642, 24020, 24116, 24197, 24473, 24513, 24834, 26331, 27457, 27458, 31209, 31210, 31225, 31234, 31244, 31246, 31252, 31260, 31265, 31266, 31268, 31272, 31404, 32594, 33032, 36822, 60999, 61005, 61024, 61027, 61048, 61052, 61155, 61209, 61229, 61252, 61293, 61325, 61331, 61386, 61408, 61527, 61641, 61653, 61659, 61670, 61771, 61809, 61945, 62087, 62144, 62332, 62375, 62378, 62580, 62725, 62835, 62900, 62902, 69963, 82227, 85522, 88454, 92979, 98330, 101010, 101604, 104721, 106997, 145742, 220674, 228769, 235414, 246728, 439570, 444539, 444683, 444972, 520108, 520191, 520296, 521238, 526618, 595928, 637563, 637566, 637796, 638014, 638024, 641256, 641423, 643820, 778574, 1549026, 1549778, 1550470, 1551246, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363388, 5364729, 5365027, 5365049, 5366244, 5367698, 5367706, 5368076, 6429333, 6999977, 16220109]
Should use: n=337 [240, 8174, 16220109, 19310, 243, 875, 6560, 679, 643820, 356, 5364729, 11529, 8129, 61252, 6920, 6448, 6106, 7761, 8007, 14296, 32594, 14286, 1549026, 88454, 5367706, 5315892, 7894, 8186, 6137, 244, 5367698, 18827, 24116, 12297, 26331, 14257, 7632, 101010, 6386, 24020, 10748, 60999, 106997, 7059, 7635, 439570, 14525, 8452, 7360, 8103, 7150, 9609, 807, 14491, 12741, 521238, 5950, 15717, 61048, 7194, 13436, 17525, 8712, 8163, 11086, 6505, 16324, 61670, 7500, 61771, 10925, 1549778, 460, 62835, 10430, 1060, 7762, 8175, 7583, 10400, 18554, 61659, 7768, 3578033, 7803, 5365027, 7749, 61155, 8038, 31265, 7916, 8635, 61809, 33032, 7997, 1136, 5273467, 8363, 1183, 12506, 7361, 7127, 6287, 8615, 8785, 10895, 11527, 5365049, 62580, 12178, 7654, 61386, 5610, 62378, 12180, 13561, 6753, 8042, 5352539, 11980, 12348, 1049, 9024, 61052, 31244, 5961, 145742, 5366244, 1030, 177, 12748, 8180, 12327, 1551246, 15037, 8048, 637566, 24834, 31260, 31272, 5362588, 16255, 2214, 11525, 92979, 11617, 6549, 957, 31209, 12206, 4133, 526618, 7967, 7288, 7731, 8467, 31268, 98330, 62087, 2879, 650, 246728, 6998, 61408, 27458, 6590, 326, 101604, 12097, 5362814, 17121, 13204, 8205, 7593, 8294, 7119, 10364, 6429333, 7795, 62902, 10285, 11583, 1550470, 7720, 8125, 8184, 8159, 454, 31225, 7695, 637563, 8051, 14328, 22873, 62375, 16741, 61653, 660, 12813, 61527, 196, 31266, 7915, 6989, 9261, 61293, 5280450, 31234, 228769, 9025, 520191, 7047, 22310, 311, 61331, 7136, 24473, 2345, 5355850, 6184, 2969, 27457, 5960, 11614, 62725, 5368076, 638014, 7826, 12587, 6374, 61325, 7969, 9862, 887, 3893, 126, 11124, 104721, 7409, 61209, 5363388, 8914, 21057, 7335, 641256, 7341, 444539, 8658, 6569, 61229, 5780, 11569, 31252, 13187, 8122, 8082, 9016, 7983, 520296, 6999977, 23642, 62332, 11428, 7122, 7820, 220674, 637796, 7165, 7095, 24513, 7799, 61024, 6140, 61945, 21648, 520108, 3776, 7601, 638024, 31210, 444972, 2682, 6054, 235414, 7937, 61005, 1032, 9256, 8375, 8137, 15606, 11419, 1068, 1110, 7921, 9589, 778574, 36822, 595928, 8118, 31404, 641423, 8908, 7410, 85522, 69963, 10890, 15510, 7765, 24197, 6658, 62900, 994, 8456, 82227, 5362798, 8797, 5363233, 2758, 10722, 61641, 7147, 6584, 7144, 61027, 15380, 31246, 11902, 261, 444683, 10797, 17617, 62144, 10448, 7792, 5779, 8723]
Equal? False
{18635}
k=1
Used: n=338 [126, 176, 177, 196, 239, 240, 241, 243, 261, 263, 325, 379, 460, 650, 660, 679, 807, 875, 887, 957, 994, 1001, 1032, 1049, 1110, 1136, 1183, 2214, 2346, 2682, 2758, 3776, 3893, 4133, 5541, 5610, 5779, 5780, 5950, 5960, 5961, 6050, 6057, 6140, 6184, 6287, 6374, 6386, 6448, 6501, 6505, 6549, 6560, 6569, 6584, 6590, 6658, 6753, 6826, 6943, 6989, 6997, 7059, 7095, 7119, 7122, 7144, 7147, 7150, 7151, 7165, 7194, 7288, 7335, 7360, 7361, 7410, 7463, 7500, 7519, 7583, 7593, 7601, 7635, 7654, 7695, 7720, 7731, 7749, 7762, 7765, 7768, 7792, 7795, 7799, 7803, 7826, 7894, 7915, 7916, 7921, 7937, 7967, 7983, 7991, 7997, 8007, 8030, 8038, 8042, 8048, 8063, 8091, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8163, 8175, 8184, 8186, 8193, 8294, 8452, 8456, 8467, 8635, 8658, 8697, 8712, 8723, 8785, 8797, 8892, 8908, 8914, 8918, 9016, 9024, 9025, 9256, 9261, 9589, 9609, 9862, 10285, 10400, 10430, 10448, 10722, 10748, 10797, 10890, 10895, 10925, 11086, 11124, 11419, 11525, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 12097, 12178, 12180, 12297, 12327, 12348, 12506, 12580, 12587, 12741, 12748, 12810, 12813, 13204, 13216, 13561, 14104, 14228, 14286, 14296, 14328, 14491, 14514, 15037, 15380, 15510, 15717, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 19310, 21057, 21648, 22310, 22386, 22873, 23235, 24020, 24116, 24197, 24473, 24513, 24834, 26331, 27457, 27458, 31209, 31225, 31234, 31244, 31246, 31265, 31266, 31268, 31272, 31404, 32594, 33032, 36822, 60999, 61005, 61024, 61027, 61048, 61130, 61151, 61155, 61185, 61192, 61199, 61204, 61229, 61252, 61293, 61325, 61337, 61386, 61408, 61527, 61641, 61653, 61659, 61670, 61771, 61918, 62087, 62144, 62332, 62374, 62375, 62378, 62444, 62572, 62580, 62725, 62835, 62900, 62902, 69963, 78925, 82227, 85522, 88454, 89440, 92979, 93375, 98330, 101010, 101604, 104721, 106997, 145742, 159055, 165675, 170833, 220674, 228769, 235414, 246728, 439570, 440967, 444539, 444683, 444972, 519539, 520108, 520191, 520296, 521238, 526618, 556940, 565690, 595928, 637563, 637566, 637776, 637796, 638014, 638024, 641256, 641423, 643820, 1549778, 1551246, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363388, 5363491, 5364729, 5365027, 5365049, 5367698, 5367706, 5368076, 6114390, 6429333, 6999977, 16220109]
Should use: n=337 [62375, 240, 8137, 887, 24116, 33032, 8186, 36822, 8118, 1551246, 22873, 520296, 21648, 660, 17617, 220674, 61005, 12297, 11419, 62444, 6057, 556940, 82227, 4133, 6549, 15037, 8892, 7583, 31209, 526618, 637563, 650, 11617, 85522, 10448, 6114390, 14514, 18554, 5960, 5363388, 7410, 61185, 1136, 61918, 2758, 8129, 9016, 61048, 61204, 263, 61325, 6560, 26331, 14296, 7792, 31244, 11086, 6429333, 92979, 27457, 177, 261, 15380, 17525, 31404, 228769, 3776, 5362798, 12587, 5362814, 7122, 637776, 22310, 1049, 12180, 6590, 5950, 69963, 23235, 7768, 2346, 14104, 5780, 61252, 641423, 61229, 9024, 5280450, 11525, 6943, 6448, 7720, 6050, 62900, 78925, 7894, 7144, 16324, 1183, 61771, 8723, 31234, 7795, 325, 8184, 13561, 16220109, 15510, 62572, 101010, 7361, 8103, 27458, 10722, 9256, 8908, 6184, 61293, 444539, 6569, 5365049, 8159, 61024, 10430, 460, 61386, 521238, 440967, 24473, 637566, 520108, 6140, 1549778, 98330, 8697, 9862, 5779, 24020, 5541, 10925, 5367706, 6374, 6997, 14286, 439570, 61151, 8063, 10895, 62902, 7967, 7921, 19310, 7799, 8163, 8091, 12178, 60999, 807, 994, 8038, 2214, 7059, 8193, 62725, 6989, 5368076, 31246, 61199, 7695, 5363491, 62087, 7500, 643820, 31268, 7194, 24513, 17121, 61670, 1032, 519539, 5365027, 6584, 7147, 11902, 638014, 8456, 14491, 10285, 5273467, 170833, 7803, 9261, 8175, 61192, 5352539, 5315892, 159055, 6505, 61408, 235414, 24834, 8294, 31265, 61337, 8797, 638024, 12580, 7463, 104721, 12348, 13216, 7762, 32594, 12506, 7095, 8467, 8658, 379, 15717, 61659, 241, 145742, 565690, 7593, 6287, 11583, 5961, 7151, 11569, 875, 520191, 12327, 6753, 101604, 11527, 239, 10400, 8452, 9589, 10748, 7288, 7991, 14228, 8785, 8914, 61527, 61653, 7765, 1110, 11529, 31266, 7360, 7826, 444683, 6826, 7150, 2682, 8918, 126, 5610, 8712, 8125, 6999977, 7635, 7997, 21057, 12741, 7119, 62378, 7519, 62144, 7654, 12097, 10797, 165675, 89440, 16255, 6658, 7335, 7915, 8030, 10890, 11124, 16741, 12748, 1001, 5355850, 31272, 8635, 5364729, 196, 8007, 62374, 246728, 5362588, 9609, 957, 61130, 7937, 61641, 13204, 7983, 12810, 12813, 62580, 595928, 31225, 62835, 5367698, 7731, 7165, 24197, 6386, 3893, 106997, 444972, 14328, 8048, 7749, 62332, 61027, 679, 6501, 61155, 7916, 22386, 637796, 8122, 11614, 641256, 88454, 3578033, 9025, 8042, 7601, 243, 93375]
Equal? False
{176}
k=2
Used: n=338 [176, 177, 196, 239, 241, 243, 261, 263, 311, 326, 379, 454, 460, 660, 679, 807, 875, 957, 994, 1001, 1030, 1032, 1049, 1060, 1068, 1110, 1136, 1183, 2682, 2879, 2969, 3776, 3893, 4133, 5779, 5780, 5950, 5960, 5961, 6057, 6106, 6140, 6184, 6213, 6276, 6448, 6501, 6505, 6549, 6560, 6584, 6590, 6658, 6753, 6826, 6920, 6943, 6989, 6997, 6998, 7047, 7059, 7095, 7119, 7127, 7144, 7147, 7150, 7151, 7165, 7288, 7335, 7341, 7360, 7361, 7409, 7463, 7500, 7519, 7583, 7593, 7601, 7632, 7635, 7654, 7720, 7731, 7749, 7761, 7762, 7765, 7792, 7795, 7799, 7820, 7824, 7826, 7937, 7967, 7983, 7991, 7997, 8007, 8030, 8038, 8042, 8051, 8063, 8077, 8078, 8082, 8091, 8093, 8103, 8118, 8122, 8129, 8137, 8159, 8163, 8174, 8175, 8180, 8184, 8186, 8193, 8205, 8294, 8363, 8375, 8452, 8456, 8615, 8635, 8658, 8697, 8712, 8723, 8797, 8857, 8908, 8918, 9016, 9024, 9025, 9256, 9589, 9862, 10285, 10364, 10400, 10430, 10448, 10722, 10748, 10797, 10882, 10890, 10925, 11086, 11124, 11428, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 11980, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12580, 12587, 12741, 12810, 12813, 13187, 13204, 13216, 13436, 13561, 14104, 14228, 14257, 14286, 14491, 14514, 14525, 15037, 15380, 15510, 15606, 15717, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 18635, 19310, 21057, 21648, 22310, 22386, 22873, 23235, 23642, 24116, 24197, 24473, 24513, 24834, 26331, 27457, 27458, 31209, 31225, 31234, 31244, 31246, 31249, 31252, 31265, 31266, 31268, 31272, 31404, 32594, 36822, 60999, 61005, 61024, 61027, 61048, 61052, 61138, 61151, 61155, 61177, 61185, 61192, 61209, 61229, 61293, 61325, 61331, 61337, 61408, 61527, 61641, 61653, 61659, 61670, 61771, 61809, 61918, 61945, 62087, 62144, 62332, 62374, 62375, 62378, 62572, 62580, 62725, 62835, 62900, 62902, 69963, 78925, 82227, 85522, 88454, 92979, 98330, 101010, 101604, 106997, 145742, 159055, 170833, 220674, 228769, 235414, 246728, 439570, 440967, 444539, 444683, 444972, 519539, 520191, 520296, 521238, 526618, 556940, 565690, 595928, 637563, 637566, 637776, 637796, 641256, 641423, 778574, 1549026, 1550470, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363388, 5364729, 5365027, 5365049, 5366244, 5367698, 5368076, 6114390, 6429333, 6999977, 16220109]
Should use: n=337 [14286, 62332, 15510, 101604, 31265, 36822, 1001, 31244, 439570, 61918, 7937, 11428, 61527, 16324, 13561, 14104, 6920, 5315892, 12506, 228769, 12741, 15037, 8635, 5280450, 61331, 8078, 16741, 21057, 3776, 1110, 7593, 61337, 5367698, 8797, 1183, 159055, 17617, 875, 7654, 241, 8159, 61177, 8042, 460, 1550470, 6505, 595928, 444972, 2969, 8163, 8077, 61293, 12097, 5950, 10364, 101010, 239, 8118, 637563, 7341, 31404, 61209, 11583, 12580, 7165, 4133, 31246, 679, 11902, 556940, 26331, 6140, 1030, 637776, 10722, 8082, 8294, 778574, 7635, 61229, 5960, 641256, 12587, 8180, 62374, 6584, 3578033, 7601, 263, 8063, 170833, 62572, 8712, 5366244, 1068, 6448, 246728, 8193, 14257, 8137, 8205, 7119, 10748, 6658, 92979, 8723, 8908, 1136, 12180, 176, 8091, 88454, 8363, 641423, 11617, 11980, 60999, 61809, 6943, 62375, 27458, 12348, 6184, 27457, 7632, 7983, 444539, 15380, 31268, 637566, 14228, 13187, 8038, 6753, 7127, 10448, 1032, 9024, 8103, 7967, 9862, 10430, 17121, 145742, 6276, 22386, 22310, 11124, 61192, 18554, 69963, 8030, 62725, 196, 61945, 7095, 5365027, 14525, 15717, 98330, 9589, 5362798, 7765, 61670, 565690, 61024, 6106, 21648, 11527, 10797, 8051, 7761, 31225, 5363388, 5273467, 7519, 85522, 5355850, 7795, 7583, 9016, 61027, 6997, 379, 61325, 7820, 7997, 7824, 12327, 454, 5961, 62087, 7150, 23642, 24834, 994, 16220109, 62835, 7361, 31272, 12813, 7731, 10882, 61155, 9256, 15606, 5363233, 6114390, 8697, 807, 7409, 8857, 8174, 7059, 24513, 6989, 62900, 957, 7991, 24197, 11569, 7147, 5368076, 2682, 61185, 8452, 78925, 7792, 444683, 61052, 23235, 24473, 520296, 177, 62144, 7360, 9025, 61641, 10925, 6429333, 31266, 7799, 1549026, 5364729, 14514, 7720, 3893, 61151, 5362588, 62580, 8007, 12297, 61048, 7144, 11086, 5362814, 6501, 440967, 8615, 31209, 12206, 8456, 14491, 7335, 12810, 1049, 6213, 61138, 8918, 519539, 10285, 2879, 8658, 7288, 17525, 13216, 61659, 12178, 7826, 6826, 8122, 22873, 660, 19310, 1060, 8129, 5365049, 8184, 18635, 7463, 10890, 11614, 220674, 24116, 520191, 31234, 7500, 326, 6560, 7749, 61653, 521238, 6590, 8375, 82227, 6057, 61005, 7151, 32594, 62902, 311, 7762, 5779, 8175, 7047, 16255, 8186, 235414, 31249, 243, 526618, 106997, 31252, 6999977, 62378, 13204, 5352539, 11529, 13436, 6998, 10400, 261, 637796, 8093, 5780, 61771, 61408]
Equal? False
{6549}
k=3
Used: n=338 [126, 176, 177, 239, 240, 243, 244, 263, 325, 326, 356, 379, 454, 460, 807, 875, 887, 957, 994, 1001, 1030, 1032, 1049, 1060, 1068, 1110, 2214, 2345, 2346, 2682, 2758, 2879, 3776, 3893, 5541, 5610, 5779, 5780, 5950, 5961, 6050, 6106, 6137, 6184, 6213, 6276, 6287, 6374, 6448, 6501, 6505, 6560, 6569, 6584, 6590, 6658, 6753, 6920, 6943, 6989, 6997, 7059, 7095, 7119, 7122, 7127, 7136, 7144, 7147, 7150, 7165, 7288, 7335, 7341, 7360, 7361, 7409, 7410, 7463, 7500, 7519, 7593, 7632, 7654, 7695, 7720, 7731, 7749, 7761, 7762, 7765, 7768, 7792, 7799, 7803, 7820, 7824, 7826, 7915, 7921, 7937, 7967, 7969, 7983, 7991, 7997, 8007, 8030, 8042, 8048, 8051, 8063, 8077, 8078, 8082, 8091, 8093, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8163, 8174, 8180, 8184, 8186, 8193, 8205, 8363, 8456, 8615, 8635, 8658, 8697, 8712, 8723, 8797, 8892, 8908, 8914, 8918, 9024, 9025, 9256, 9261, 9589, 9609, 9862, 10285, 10364, 10400, 10448, 10722, 10748, 10797, 10882, 10890, 10895, 10925, 11086, 11124, 11419, 11428, 11525, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12580, 12741, 12748, 12810, 12813, 13187, 13204, 13216, 13436, 13561, 14104, 14228, 14257, 14286, 14296, 14328, 14491, 14514, 14525, 15037, 15380, 15510, 15606, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 19310, 21057, 21648, 22310, 22386, 23235, 23642, 24020, 24116, 24473, 24513, 26331, 27457, 27458, 31209, 31210, 31225, 31234, 31244, 31246, 31249, 31252, 31260, 31265, 31266, 31268, 31272, 31404, 32594, 33032, 36822, 60999, 61005, 61024, 61027, 61048, 61052, 61130, 61151, 61185, 61192, 61199, 61204, 61229, 61293, 61325, 61331, 61337, 61386, 61408, 61527, 61641, 61653, 61670, 61771, 61809, 61918, 61945, 62087, 62144, 62332, 62375, 62378, 62444, 62580, 62725, 62835, 62900, 62902, 82227, 85522, 88454, 92979, 93375, 98330, 101010, 101604, 104721, 106997, 145742, 159055, 165675, 220674, 246728, 439570, 440967, 444683, 444972, 519539, 520108, 520191, 520296, 521238, 556940, 595928, 637563, 637566, 637796, 638014, 638024, 641256, 641423, 643820, 778574, 1549026, 1550470, 1551246, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363491, 5365049, 5366244, 5367698, 5367706, 6114390, 6429333, 6999977, 16220109]
Should use: n=337 [6505, 61945, 7720, 637563, 16741, 62725, 8007, 101010, 1551246, 12178, 8635, 12297, 7765, 11614, 8125, 7792, 7937, 8051, 12580, 14286, 8077, 32594, 93375, 8048, 165675, 82227, 8908, 8797, 5352539, 31210, 7095, 6584, 520191, 61293, 126, 5363491, 8658, 8918, 13436, 61918, 641423, 8122, 7341, 6920, 994, 98330, 61052, 11525, 444972, 13187, 62378, 5362798, 8137, 8159, 24513, 21057, 7761, 9025, 92979, 11124, 8093, 61204, 62332, 10797, 444683, 22386, 12741, 61151, 7409, 14296, 325, 1068, 5950, 106997, 104721, 7921, 19310, 2345, 7165, 61771, 7820, 8103, 6997, 61653, 27458, 10285, 7150, 643820, 556940, 1001, 61527, 243, 8129, 23642, 7500, 7288, 7127, 31265, 145742, 31249, 62144, 85522, 10400, 9589, 5610, 7361, 8712, 8180, 61386, 13216, 11569, 14525, 356, 11086, 12348, 6114390, 10364, 61809, 8193, 6753, 6658, 14491, 62444, 12206, 16255, 62902, 7824, 14228, 8174, 5362814, 10448, 239, 12813, 5961, 14328, 8363, 11529, 7144, 1110, 6050, 61641, 61670, 27457, 8030, 62375, 10722, 6429333, 7915, 7803, 8892, 15380, 17121, 3578033, 9862, 33032, 263, 62900, 7136, 60999, 9256, 7122, 641256, 6999977, 7997, 11419, 1032, 638024, 5366244, 101604, 440967, 5355850, 7695, 8078, 61048, 5367698, 10882, 460, 6213, 8091, 16324, 8697, 23235, 6943, 240, 519539, 7519, 24116, 875, 6374, 5541, 26331, 6287, 5273467, 159055, 61325, 62835, 14104, 1030, 246728, 7632, 10890, 7731, 807, 11617, 61229, 15510, 11428, 61005, 177, 16220109, 6448, 11902, 244, 5367706, 7749, 61027, 220674, 3893, 7360, 8914, 7654, 61337, 7463, 8163, 520296, 7967, 176, 6590, 61185, 6569, 7410, 5780, 18554, 12097, 6560, 595928, 15606, 637796, 12810, 13561, 1550470, 12180, 61192, 6106, 7799, 7119, 8723, 8082, 31252, 7335, 12748, 61199, 11583, 12327, 454, 7768, 7593, 61130, 8205, 957, 6276, 61408, 5280450, 14514, 7147, 24020, 1060, 61024, 8456, 7991, 31268, 2879, 2346, 5363233, 2682, 521238, 7983, 31244, 1549026, 9261, 778574, 887, 31234, 2214, 31404, 7969, 3776, 9024, 8184, 31225, 379, 8042, 10895, 61331, 10925, 7059, 5779, 88454, 11527, 6501, 12506, 10748, 9609, 31272, 36822, 520108, 24473, 5362588, 5365049, 8063, 637566, 31266, 6989, 2758, 8186, 17525, 8118, 62087, 22310, 17617, 21648, 326, 7826, 439570, 31209, 7762, 31260, 6184, 15037, 1049, 62580, 14257, 8615, 31246, 6137, 13204, 638014]
Equal? False
{5315892}
k=4
Used: n=338 [126, 176, 177, 196, 239, 240, 241, 243, 261, 263, 311, 325, 356, 379, 454, 460, 679, 807, 875, 887, 957, 994, 1001, 1030, 1032, 1049, 1068, 1110, 1136, 1183, 2214, 2345, 2346, 2758, 2969, 3776, 3893, 5541, 5610, 5779, 5950, 5960, 5961, 6050, 6054, 6057, 6106, 6140, 6184, 6213, 6276, 6287, 6374, 6386, 6448, 6501, 6505, 6549, 6560, 6590, 6658, 6753, 6826, 6920, 6989, 7047, 7059, 7095, 7119, 7122, 7127, 7136, 7144, 7147, 7150, 7165, 7194, 7288, 7335, 7341, 7360, 7361, 7409, 7410, 7500, 7519, 7583, 7593, 7632, 7635, 7654, 7695, 7720, 7731, 7749, 7761, 7762, 7765, 7768, 7792, 7795, 7799, 7803, 7820, 7824, 7826, 7894, 7915, 7916, 7921, 7937, 7967, 7969, 7983, 8007, 8030, 8038, 8042, 8048, 8051, 8063, 8077, 8078, 8082, 8091, 8093, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8174, 8175, 8184, 8205, 8452, 8456, 8467, 8615, 8635, 8658, 8697, 8712, 8723, 8797, 8857, 8892, 8908, 8914, 9016, 9024, 9025, 9261, 9589, 9609, 9862, 10364, 10722, 10748, 10797, 10890, 10895, 10925, 11086, 11419, 11428, 11529, 11569, 11583, 11614, 11617, 11902, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12587, 12741, 12748, 12810, 12813, 13187, 13204, 13216, 13436, 13561, 14228, 14257, 14328, 14491, 14514, 14525, 15037, 15510, 15717, 16741, 17121, 17525, 17617, 18554, 18635, 18827, 19310, 21057, 21648, 22310, 22873, 23235, 23642, 24020, 24116, 24513, 26331, 27457, 31209, 31210, 31225, 31234, 31244, 31246, 31252, 31260, 31265, 31266, 31268, 32594, 33032, 36822, 60999, 61005, 61048, 61052, 61130, 61138, 61151, 61155, 61177, 61185, 61192, 61199, 61209, 61229, 61252, 61293, 61325, 61331, 61337, 61386, 61408, 61641, 61659, 61670, 61771, 61809, 61918, 61945, 62087, 62144, 62332, 62374, 62375, 62378, 62444, 62572, 62580, 62835, 62900, 69963, 78925, 82227, 85522, 88454, 89440, 92979, 93375, 98330, 101604, 104721, 106997, 145742, 159055, 165675, 170833, 220674, 235414, 246728, 439570, 440967, 444539, 444683, 444972, 520108, 520296, 521238, 526618, 556940, 565690, 595928, 637563, 637566, 637776, 637796, 638014, 638024, 641256, 643820, 778574, 1549026, 1549778, 1550470, 1551246, 3578033, 5273467, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363388, 5363491, 5364729, 5365027, 5365049, 5366244, 5367706, 5368076, 6114390, 6429333, 6999977, 16220109]
Should use: n=337 [31225, 8063, 61155, 85522, 1550470, 61005, 31246, 165675, 12327, 7341, 14491, 8697, 1030, 62087, 61771, 12180, 88454, 8048, 7826, 7695, 11902, 8030, 11529, 6213, 10748, 31266, 7749, 13436, 62332, 643820, 13561, 6753, 8125, 7361, 8184, 2758, 7593, 18554, 13216, 7409, 14228, 5368076, 9589, 93375, 69963, 62444, 9025, 1136, 2346, 7937, 12206, 7122, 7768, 13204, 24116, 31234, 8093, 444972, 11583, 27457, 6057, 8857, 62375, 6184, 6590, 23642, 11569, 61659, 12748, 9016, 5355850, 8038, 460, 7136, 957, 5541, 6560, 5367706, 875, 1549026, 1110, 21648, 61293, 8122, 7519, 8175, 18827, 31244, 638014, 8205, 220674, 7799, 679, 5365049, 5362814, 7915, 5362588, 23235, 62572, 17617, 61138, 454, 638024, 11086, 6549, 8007, 126, 12178, 526618, 637776, 8082, 5779, 61185, 521238, 61151, 7824, 887, 61670, 106997, 8658, 7410, 7635, 7765, 2969, 104721, 778574, 6140, 565690, 7916, 33032, 1068, 8467, 31210, 61641, 15717, 14328, 62144, 6374, 31265, 61331, 7921, 12506, 5363388, 5363233, 5352539, 637563, 7761, 1001, 6054, 159055, 14514, 6999977, 61337, 3578033, 31268, 7047, 12813, 18635, 61252, 6114390, 12587, 7144, 10925, 60999, 177, 5362798, 32594, 61199, 6448, 16741, 8452, 92979, 261, 6505, 12097, 17121, 62900, 6989, 1032, 8042, 8892, 241, 10895, 7731, 14525, 145742, 8723, 61918, 61177, 98330, 9609, 61229, 21057, 7335, 7632, 89440, 7127, 61809, 379, 8712, 7795, 176, 10364, 7360, 556940, 520296, 641256, 7803, 8174, 7147, 10890, 7119, 31252, 78925, 62374, 9862, 7894, 61192, 24020, 8456, 11428, 15510, 239, 12810, 6050, 61325, 12297, 62835, 7969, 12348, 6276, 8129, 595928, 1549778, 8091, 7059, 196, 22873, 61052, 26331, 31209, 7983, 7165, 5961, 1551246, 22310, 5364729, 61130, 5366244, 5363491, 62580, 7288, 62378, 6826, 61209, 637566, 8908, 8051, 7654, 311, 1183, 8914, 444683, 9261, 7500, 7150, 7095, 6501, 14257, 439570, 8615, 7720, 235414, 5273467, 9024, 7792, 6287, 8137, 11614, 2214, 13187, 5610, 356, 7194, 243, 10797, 8118, 325, 5960, 6429333, 994, 17525, 2345, 61945, 7820, 10722, 36822, 8078, 61408, 24513, 5315892, 12741, 520108, 8635, 61048, 1049, 101604, 6386, 8103, 807, 82227, 8159, 637796, 6920, 444539, 440967, 5950, 7583, 3776, 240, 6106, 8077, 3893, 170833, 31260, 11419, 263, 61386, 8797, 15037, 7762, 246728, 7967, 16220109, 6658, 11617, 19310]
Equal? False
{5365027}
In [66]:
len(list(splits)[k][0])
Out[66]:
337
In [67]:
len(lb_CIDs)
Out[67]:
69
In [60]:
X.columns.shape
Out[60]:
(14612,)
In [61]:
train_features
Out[61]:
CID
complexity from pubmed
MW
AMW
Sv
Se
Sp
Si
Mv
Me
...
91305518_2
91411526_2
91541756_2
91552833_2
91563027_2
91595028_2
91614181_2
91617014_2
91617930_2
91618238_2
0
126
0.181128
0.270753
0.030587
0.262264
0.219126
0.253846
0.214989
0.216981
0.425532
...
0.000013
0.000331
0.014024
0.000296
0.021098
0.000186
0.003159
0.002299
0.000138
0.011080
2
177
0.020039
0.067721
0.015501
0.075556
0.083688
0.078074
0.089782
0.106346
0.382979
...
0.000014
0.000092
0.000961
0.000339
0.000657
0.000008
0.000098
0.000221
0.000037
0.001932
4
196
0.221790
0.333247
0.023779
0.306622
0.308572
0.294339
0.305729
0.138079
0.539007
...
0.001029
0.000737
0.013662
0.009383
0.001954
0.000820
0.003130
0.005600
0.002189
0.010702
6
240
0.141051
0.229143
0.026007
0.240877
0.197235
0.241008
0.197436
0.214408
0.269504
...
0.000000
0.000084
0.021801
0.000134
0.008264
0.000479
0.005696
0.002347
0.000035
0.005194
8
243
0.202335
0.270753
0.030587
0.262264
0.219126
0.253846
0.214989
0.216981
0.425532
...
0.000051
0.000187
0.044910
0.000528
0.010226
0.000747
0.007957
0.003826
0.000216
0.007831
9
244
0.107782
0.234396
0.019297
0.256640
0.228315
0.262527
0.232498
0.171527
0.219858
...
0.000116
0.000189
0.025945
0.000134
0.012913
0.000906
0.009812
0.003065
0.000078
0.008116
10
261
0.048249
0.140695
0.009412
0.166906
0.178825
0.177695
0.188943
0.095197
0.205674
...
0.000217
0.000556
0.003460
0.000902
0.001355
0.000048
0.000379
0.001313
0.000325
0.006664
14
311
0.441634
0.452824
0.038815
0.355019
0.343198
0.311362
0.323340
0.180103
0.900709
...
0.000614
0.001629
0.010255
0.005364
0.007480
0.000739
0.003606
0.004504
0.001656
0.006944
17
326
0.235409
0.338604
0.016726
0.377901
0.339949
0.390425
0.346192
0.162093
0.170213
...
0.000313
0.000503
0.012979
0.001143
0.016830
0.000362
0.004182
0.003648
0.000541
0.013611
18
356
0.048638
0.250286
0.000000
0.343952
0.378270
0.385618
0.404788
0.065180
0.000000
...
0.002901
0.000751
0.002844
0.005614
0.000511
0.000420
0.001417
0.002331
0.005848
0.005102
20
454
0.115953
0.286643
0.006000
0.349575
0.369098
0.376937
0.387279
0.088336
0.113475
...
0.002473
0.000507
0.004109
0.006292
0.001075
0.000417
0.001378
0.002237
0.003400
0.007182
21
460
0.161479
0.276006
0.023738
0.278027
0.250206
0.275393
0.250065
0.176672
0.361702
...
0.000115
0.000744
0.033934
0.000296
0.021098
0.000419
0.004936
0.003800
0.000215
0.011080
22
650
0.139105
0.177052
0.022693
0.172529
0.169636
0.169014
0.171434
0.145798
0.460993
...
0.000122
0.000356
0.005407
0.002309
0.000602
0.000191
0.001516
0.004253
0.000577
0.001666
23
660
0.321012
0.338474
0.027778
0.337850
0.283200
0.331948
0.279102
0.210978
0.354610
...
0.000000
0.001248
0.024830
0.000031
0.015625
0.000469
0.004649
0.003483
0.000671
0.012152
24
679
0.056420
0.156377
0.027917
0.126825
0.132518
0.146194
0.138201
0.114923
0.319149
...
0.000055
0.000205
0.000932
0.000335
0.000641
0.000017
0.000218
0.000490
0.000082
0.001850
28
807
0.000000
0.613180
1.000000
0.057370
0.000000
0.137541
0.000000
1.000000
0.361702
...
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
29
875
0.260700
0.343493
0.040700
0.258046
0.257234
0.220422
0.241674
0.174099
1.000000
...
0.000831
0.000758
0.006574
0.002722
0.003265
0.000480
0.003219
0.001736
0.001053
0.016198
30
887
0.003891
0.036487
0.007730
0.045645
0.067191
0.049796
0.075264
0.040309
0.397163
...
0.000056
0.000093
0.001008
0.000345
0.000683
0.000008
0.000100
0.000057
0.000037
0.002066
31
957
0.085214
0.291896
0.003502
0.365339
0.400178
0.398456
0.422341
0.072041
0.092199
...
0.005048
0.000507
0.004109
0.007223
0.001911
0.000601
0.001744
0.001644
0.005312
0.007182
33
994
0.297665
0.382789
0.022758
0.384153
0.348939
0.381914
0.350431
0.169811
0.340426
...
0.000197
0.000315
0.043509
0.001512
0.011080
0.001652
0.015200
0.005272
0.000408
0.012544
35
1030
0.040661
0.151071
0.011918
0.158381
0.184219
0.162284
0.191992
0.076329
0.390071
...
0.001107
0.000359
0.002005
0.001314
0.003858
0.000155
0.000861
0.000213
0.000327
0.015625
37
1032
0.078210
0.145818
0.019101
0.142618
0.153139
0.140736
0.156915
0.113208
0.482270
...
0.000342
0.000562
0.010916
0.001314
0.002469
0.000155
0.000861
0.001918
0.000445
0.006944
38
1049
0.060117
0.158873
0.022832
0.174474
0.145947
0.178515
0.152051
0.196398
0.198582
...
0.000000
0.000000
0.019837
0.000000
0.002241
0.000189
0.001863
0.001257
0.000000
0.003328
39
1060
0.163424
0.182175
0.036015
0.148241
0.143967
0.132055
0.139420
0.176672
0.815603
...
0.000122
0.000356
0.010597
0.002309
0.001355
0.000191
0.001516
0.003361
0.000577
0.003748
40
1068
0.005447
0.114766
0.020497
0.105468
0.110611
0.133356
0.120648
0.099485
0.070922
...
0.000125
0.000092
0.000240
0.000151
0.000164
0.000008
0.000098
0.000221
0.000037
0.004347
41
1110
0.180156
0.260273
0.032986
0.215303
0.213435
0.194718
0.206554
0.162093
0.773050
...
0.000329
0.000772
0.012346
0.004954
0.002189
0.000545
0.002278
0.004017
0.001268
0.008900
42
1136
0.173346
0.325627
0.029076
0.292803
0.264245
0.309269
0.267618
0.170669
0.297872
...
0.000320
0.000744
0.014024
0.000525
0.006117
0.000186
0.001404
0.003002
0.000421
0.024931
43
1183
0.262646
0.348851
0.029493
0.329325
0.288594
0.316508
0.282137
0.198113
0.475177
...
0.000112
0.001276
0.022500
0.000785
0.022420
0.000223
0.003044
0.005378
0.000412
0.017200
44
2214
0.324903
0.385338
0.025787
0.374970
0.336171
0.366305
0.331717
0.181818
0.411348
...
0.000309
0.001600
0.029873
0.001521
0.021948
0.000414
0.004713
0.011228
0.000835
0.016649
45
2345
0.414397
0.505149
0.026007
0.518904
0.427827
0.516373
0.421818
0.214408
0.269504
...
0.000101
0.000440
0.032362
0.000671
0.014489
0.001293
0.037623
0.007243
0.000379
0.006316
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
434
638024
0.801556
0.695282
0.022358
0.707855
0.627107
0.707103
0.624394
0.179245
0.290780
...
0.000010
0.002223
0.008190
0.000366
0.007869
0.000979
0.002748
0.003714
0.001390
0.004300
435
641256
0.140467
0.208416
0.011257
0.242492
0.242882
0.255797
0.253042
0.116638
0.177305
...
0.000210
0.000192
0.001712
0.000547
0.002163
0.000030
0.000204
0.000197
0.000079
0.003144
436
641423
0.412451
0.442682
0.023224
0.459081
0.394833
0.459818
0.392781
0.192110
0.269504
...
0.000104
0.000292
0.027778
0.001013
0.009335
0.000718
0.008521
0.004727
0.000286
0.004959
437
643820
0.291829
0.354364
0.007551
0.425162
0.433172
0.455011
0.451378
0.102058
0.106383
...
0.001008
0.000717
0.005625
0.003139
0.001830
0.000361
0.001712
0.004444
0.000840
0.006719
438
778574
0.414397
0.484422
0.017918
0.520519
0.473473
0.531162
0.477424
0.160377
0.219858
...
0.000934
0.000652
0.030914
0.002785
0.011080
0.001119
0.011032
0.006520
0.001136
0.009452
441
1549026
0.453307
0.463695
0.011257
0.522134
0.519136
0.545951
0.533044
0.116638
0.177305
...
0.000756
0.001194
0.008359
0.004885
0.001568
0.000585
0.003872
0.007901
0.001577
0.005328
442
1549778
0.447471
0.458572
0.008196
0.546423
0.544805
0.582909
0.565072
0.109777
0.099291
...
0.000756
0.000914
0.006400
0.005665
0.000882
0.000652
0.003338
0.007901
0.001811
0.005328
443
1550470
0.282101
0.317747
0.016146
0.339435
0.328846
0.346709
0.334708
0.135506
0.269504
...
0.000451
0.000724
0.005768
0.002031
0.001870
0.000118
0.001365
0.002883
0.000541
0.002500
444
1551246
0.404669
0.341309
0.010987
0.402818
0.383797
0.427525
0.399968
0.130360
0.092199
...
0.000448
0.000498
0.002500
0.002009
0.000458
0.000223
0.001036
0.003600
0.000538
0.002419
446
3578033
0.260700
0.348981
0.018146
0.369347
0.345343
0.374986
0.349226
0.151801
0.269504
...
0.000312
0.000981
0.010126
0.001547
0.011562
0.000312
0.003589
0.003624
0.000843
0.017485
447
5273467
0.437743
0.520909
0.016538
0.566194
0.521050
0.580986
0.527019
0.153516
0.198582
...
0.001367
0.000865
0.032707
0.004561
0.010471
0.001392
0.011662
0.007287
0.001746
0.011378
448
5280450
0.519455
0.682617
0.008155
0.796123
0.804547
0.844786
0.830541
0.104631
0.120567
...
0.007441
0.000560
0.006755
0.015625
0.001058
0.001534
0.002770
0.003104
0.007765
0.004395
451
5315892
0.196498
0.302117
0.018897
0.332227
0.292372
0.340601
0.296597
0.174099
0.191489
...
0.000112
0.000180
0.022784
0.000284
0.011562
0.000598
0.006137
0.002192
0.000076
0.006830
453
5352539
0.287938
0.395974
0.010457
0.446548
0.455063
0.467877
0.468945
0.107204
0.191489
...
0.002059
0.001242
0.009290
0.005130
0.001718
0.000411
0.002962
0.005169
0.001618
0.008789
455
5355850
0.371595
0.447935
0.019591
0.474844
0.425913
0.481365
0.427843
0.168954
0.241135
...
0.000418
0.000895
0.035679
0.001801
0.011295
0.000949
0.010221
0.008789
0.000643
0.009720
456
5362588
0.221790
0.281390
0.008922
0.333812
0.338035
0.355390
0.352217
0.107204
0.134752
...
0.001048
0.000754
0.004504
0.003341
0.001162
0.000269
0.001424
0.003065
0.000868
0.005194
457
5362798
0.169066
0.359617
0.005281
0.440925
0.464235
0.476558
0.486440
0.087479
0.092199
...
0.002363
0.000479
0.003501
0.005046
0.001666
0.000357
0.001644
0.001508
0.002096
0.005827
458
5362814
0.144942
0.244903
0.009918
0.288167
0.290458
0.305593
0.302637
0.111492
0.148936
...
0.000322
0.000521
0.004444
0.001196
0.001149
0.000067
0.000554
0.001712
0.000311
0.007972
459
5363233
0.243191
0.354364
0.007551
0.425162
0.433172
0.455011
0.451378
0.102058
0.106383
...
0.002073
0.000705
0.007287
0.006906
0.000987
0.000469
0.002076
0.005237
0.002998
0.006299
460
5363388
0.225681
0.323000
0.012497
0.355199
0.359926
0.368256
0.369770
0.114065
0.234043
...
0.001015
0.000985
0.010255
0.003841
0.001870
0.000266
0.002133
0.005450
0.000845
0.006944
463
5364729
0.305447
0.317747
0.016146
0.339435
0.328846
0.346709
0.334708
0.135506
0.269504
...
0.000318
0.000512
0.006072
0.002630
0.001954
0.000151
0.001760
0.003748
0.000547
0.002675
464
5365027
0.149416
0.323130
0.005600
0.395250
0.416675
0.426733
0.436859
0.087479
0.099291
...
0.002422
0.000494
0.003810
0.005245
0.001792
0.000360
0.001341
0.001579
0.002414
0.006504
465
5365049
0.249027
0.359487
0.011363
0.400873
0.407486
0.418052
0.419350
0.109777
0.212766
...
0.001227
0.001253
0.009518
0.004420
0.001754
0.000264
0.002511
0.006233
0.001196
0.006299
466
5366244
0.496109
0.724488
0.003812
0.897613
0.939935
0.974635
0.982273
0.084048
0.049645
...
0.009365
0.001020
0.003174
0.011517
0.001111
0.001759
0.005442
0.004622
0.029679
0.006400
467
5367698
0.418288
0.520909
0.016538
0.566194
0.521050
0.580986
0.527019
0.153516
0.198582
...
0.000903
0.000851
0.035156
0.003198
0.010092
0.001589
0.015425
0.011758
0.000928
0.010794
468
5367706
0.394942
0.484422
0.017918
0.520519
0.473473
0.531162
0.477424
0.160377
0.219858
...
0.000728
0.000872
0.037461
0.002727
0.010668
0.001113
0.010841
0.008469
0.000780
0.011687
469
5368076
0.270428
0.395974
0.010457
0.446548
0.455063
0.467877
0.468945
0.107204
0.191489
...
0.002347
0.001215
0.011080
0.007561
0.002551
0.000465
0.002433
0.006989
0.003256
0.005653
472
6429333
0.295720
0.333351
0.020954
0.362138
0.308869
0.368878
0.311115
0.190395
0.198582
...
0.000050
0.000180
0.026739
0.000505
0.009365
0.000598
0.006881
0.003624
0.000135
0.006830
473
6999977
0.196498
0.333377
0.013852
0.346674
0.365321
0.352816
0.372819
0.103774
0.333333
...
0.002491
0.001310
0.013662
0.006364
0.003052
0.000476
0.003673
0.004628
0.002189
0.019025
475
16220109
0.392996
0.427208
0.012122
0.476460
0.471560
0.496154
0.483464
0.120069
0.191489
...
0.001189
0.001205
0.008554
0.004207
0.001600
0.000355
0.002870
0.006900
0.001165
0.005487
338 rows × 14613 columns
In [ ]:
Content source: dream-olfaction/olfaction-prediction
Similar notebooks: