calculates the feature scores for each split


In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import RandomizedLasso
import sys
import os

In [2]:
# load lb, test and CV CIDs

# load LB CIDs
with open(os.path.abspath('__file__' + "/../../../../data/CID_leaderboard.txt")) as f: 
    content = f.readlines()
lb_CIDs = list(content)  
lb_CIDs = [int(x) for x in lb_CIDs]

# load test CIDs
with open(os.path.abspath('__file__' + "/../../../../data/CID_testset.txt")) as f: 
    content = f.readlines()
test_CIDs = list(content)  
test_CIDs = [int(x) for x in test_CIDs]

In [3]:
#features = pd.read_csv('../../../data/linear_scores_and_features/features_dragon_morgan.csv')
features = pd.read_csv('features.csv')
features.head()


Out[3]:
CID complexity from pubmed MW AMW Sv Se Sp Si Mv Me ... 91305518_2 91411526_2 91541756_2 91552833_2 91563027_2 91595028_2 91614181_2 91617014_2 91617930_2 91618238_2
0 126 0.181128 0.270753 0.030587 0.262264 0.219126 0.253846 0.214989 0.216981 0.425532 ... 0.000013 0.000331 0.014024 0.000296 0.021098 0.000186 0.003159 0.002299 0.000138 0.011080
1 176 0.060311 0.109331 0.025411 0.096943 0.105579 0.090940 0.107335 0.125214 0.659574 ... 0.000124 0.000205 0.008391 0.000930 0.001442 0.000094 0.000607 0.001362 0.000229 0.004162
2 177 0.020039 0.067721 0.015501 0.075556 0.083688 0.078074 0.089782 0.106346 0.382979 ... 0.000014 0.000092 0.000961 0.000339 0.000657 0.000008 0.000098 0.000221 0.000037 0.001932
3 180 0.051167 0.104208 0.011542 0.121231 0.131248 0.127898 0.139362 0.099485 0.269504 ... 0.000124 0.000205 0.003729 0.000930 0.000641 0.000094 0.000607 0.001961 0.000229 0.001850
4 196 0.221790 0.333247 0.023779 0.306622 0.308572 0.294339 0.305729 0.138079 0.539007 ... 0.001029 0.000737 0.013662 0.009383 0.001954 0.000820 0.003130 0.005600 0.002189 0.010702

5 rows × 14613 columns


In [4]:
#trainsplits = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/cv_splits_train_big.csv',header=None)
#testsplits = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/cv_splits_test_big.csv',header=None)

In [5]:
trainsplits = pd.read_csv('../../../data/cv_splits_train_bigger.csv',header=None)
testsplits = pd.read_csv('../../../data/cv_splits_test_bigger.csv',header=None)

In [6]:
descriptor = {}
for idx, desc in enumerate([u'INTENSITY/STRENGTH', u'VALENCE/PLEASANTNESS', u'BAKERY', 
                       u'SWEET', u'FRUIT', u'FISH', u'GARLIC', u'SPICES', u'COLD', u'SOUR', u'BURNT',
                       u'ACID', u'WARM', u'MUSKY', u'SWEATY', u'AMMONIA/URINOUS', u'DECAYED', u'WOOD',
                       u'GRASS', u'FLOWER', u'CHEMICAL']):
    descriptor[idx] = desc

In [7]:
targets = pd.read_csv('targets_for_feature_selection.csv')
targets.columns = ['CID'] + list(targets.columns.values[1:])
targets.head()


Out[7]:
CID INTENSITY/STRENGTH VALENCE/PLEASANTNESS BAKERY SWEET FRUIT FISH GARLIC SPICES COLD ... ACID WARM MUSKY SWEATY AMMONIA/URINOUS DECAYED WOOD GRASS FLOWER CHEMICAL
0 126 37.102041 50.081081 0.500000 21.959459 7.405405 0.175676 2.162162 4.554054 4.662162 ... 4.094595 2.486486 7.216216 1.391892 2.554054 4.675676 0.891892 1.662162 8.094595 15.283784
1 176 8.051020 45.344828 2.275862 5.103448 1.137931 0.000000 6.448276 5.965517 4.793103 ... 3.896552 5.448276 6.448276 3.551724 3.275862 4.275862 2.413793 2.482759 6.724138 7.724138
2 177 22.387755 48.418182 9.363636 19.781818 3.000000 0.763636 1.254545 2.472727 6.709091 ... 3.563636 3.218182 6.218182 1.945455 2.727273 3.872727 0.727273 3.454545 4.090909 14.200000
3 196 14.530612 44.304348 1.304348 9.804348 0.913043 0.500000 3.239130 7.108696 2.152174 ... 5.543478 6.695652 9.043478 7.304348 2.152174 4.217391 1.195652 1.543478 6.695652 7.847826
4 239 24.683673 51.724138 1.362069 13.500000 4.293103 1.482759 4.534483 6.189655 4.965517 ... 3.241379 5.068966 6.534483 0.793103 0.931034 5.413793 3.120690 5.775862 9.396552 10.862069

5 rows × 22 columns


In [8]:
import datetime

In [9]:
features.shape


Out[9]:
(476, 14613)

In [13]:
import sys
sys.path.append('/Users/rgerkin/Dropbox/science/olfaction-prediction')
sys.path.append('/Users/rgerkin/Dropbox/science/olfaction-prediction/opc_python/utils')
import loading
training_leaderboard_CIDs = sorted(loading.get_CIDs('training')+loading.get_CIDs('leaderboard'))

In [17]:
from sklearn.cross_validation import ShuffleSplit
test_size = 0.17
splits = ShuffleSplit(len(training_leaderboard_CIDs),n_iter=250,test_size=test_size,random_state=0)
i = 0
for train,test in splits:
    print([training_leaderboard_CIDs[i] for i in test])
    i+=1
    if i==5:
        break


[23235, 7824, 6276, 14104, 8030, 61185, 13216, 14514, 12810, 7519, 170833, 8091, 93375, 62572, 325, 12580, 6501, 22386, 61918, 7151, 8063, 159055, 62374, 8078, 62444, 8918, 7991, 61138, 379, 6114390, 8077, 556940, 8892, 6826, 2346, 6943, 61199, 8697, 637776, 6057, 14228, 89440, 61337, 8193, 241, 176, 61204, 61177, 6213, 31249, 8093, 165675, 61192, 519539, 565690, 7463, 8857, 5363491, 61151, 6050, 263, 1001, 5541, 6997, 440967, 239, 61130, 10882, 78925, 18635]
[7127, 18635, 1549026, 7632, 10882, 6106, 2969, 311, 6213, 8078, 356, 8051, 8615, 7047, 31210, 7409, 61331, 1030, 61945, 2879, 5366244, 23642, 13436, 15606, 61177, 8174, 6276, 5363233, 7820, 11980, 8077, 454, 7761, 1068, 8205, 6137, 31260, 778574, 2345, 14257, 18827, 31252, 1060, 326, 14525, 7341, 13187, 8180, 61809, 61052, 6920, 7824, 10364, 6998, 6054, 8363, 8093, 244, 1550470, 61209, 8082, 11428, 7969, 61138, 8857, 12206, 31249, 7136, 8375, 176]
[10895, 62444, 5541, 24020, 14328, 8048, 9609, 2345, 887, 8125, 61386, 7136, 61204, 5363491, 520108, 6374, 8892, 11525, 6050, 33032, 89440, 18827, 6054, 5367706, 6569, 2758, 104721, 11419, 7122, 7969, 240, 650, 643820, 2214, 5610, 7194, 12748, 8467, 31260, 126, 6386, 9261, 7768, 7894, 325, 61252, 61199, 6137, 8914, 1549778, 638024, 8785, 31210, 7803, 7915, 1551246, 244, 61130, 165675, 638014, 356, 7921, 93375, 14296, 2346, 7410, 7695, 6287, 7916, 6549]
[11980, 650, 89440, 7601, 5363388, 444539, 6054, 62374, 235414, 78925, 6549, 7194, 8857, 1136, 8467, 61209, 5365027, 22873, 637776, 7795, 565690, 24834, 5364729, 7916, 8038, 62572, 6386, 6826, 8175, 8452, 241, 4133, 8294, 18827, 1183, 61252, 7151, 61155, 7583, 10430, 8785, 1549778, 6140, 61659, 61177, 69963, 9016, 12587, 2969, 8375, 15717, 18635, 526618, 660, 679, 24197, 6057, 196, 7635, 6998, 311, 7047, 5368076, 261, 61138, 7894, 170833, 228769, 5960, 5315892]
[2879, 16255, 15606, 11527, 10430, 7151, 5367698, 641423, 6943, 61204, 6137, 660, 8193, 10882, 11124, 8363, 10448, 8375, 8785, 31404, 5780, 7991, 61653, 6997, 24197, 12580, 7997, 7601, 650, 6569, 22386, 14104, 14296, 244, 24473, 11525, 61027, 15380, 228769, 62902, 326, 8180, 5280450, 8163, 9256, 101010, 27458, 24834, 2682, 520191, 16324, 10400, 8294, 8918, 14286, 31272, 10285, 7463, 11980, 6584, 61024, 62725, 61527, 8186, 31249, 4133, 519539, 6998, 1060, 5365027]

In [22]:
testsplits.head()


Out[22]:
0 1 2 3 4 5 6 7 8 9 ... 59 60 61 62 63 64 65 66 67 68
0 23235 7824 6276 14104 8030 61185 13216 14514 12810 7519 ... 6050 263 1001 5541 6997 440967 239 61130 10882 78925
1 7127 18635 1549026 7632 10882 6106 2969 311 6213 8078 ... 61209 8082 11428 7969 61138 8857 12206 31249 7136 8375
2 10895 62444 5541 24020 14328 8048 9609 2345 887 8125 ... 638014 356 7921 93375 14296 2346 7410 7695 6287 7916
3 11980 650 89440 7601 5363388 444539 6054 62374 235414 78925 ... 6998 311 7047 5368076 261 61138 7894 170833 228769 5960
4 2879 16255 15606 11527 10430 7151 5367698 641423 6943 61204 ... 6584 61024 62725 61527 8186 31249 4133 519539 6998 1060

5 rows × 69 columns


In [30]:
[int(x) if not np.isnan(x) else x for x in sorted(list(trainsplits.loc[0]))]


Out[30]:
[126,
 177,
 196,
 240,
 243,
 244,
 261,
 311,
 326,
 356,
 454,
 460,
 650,
 660,
 679,
 807,
 875,
 887,
 957,
 994,
 1030,
 1032,
 1049,
 1060,
 1068,
 1110,
 1136,
 1183,
 2214,
 2345,
 2682,
 2758,
 2879,
 2969,
 3776,
 3893,
 4133,
 5610,
 5779,
 5780,
 5950,
 5960,
 5961,
 6054,
 6106,
 6137,
 6140,
 6184,
 6287,
 6374,
 6386,
 6448,
 6505,
 6549,
 6560,
 6569,
 6584,
 6590,
 6658,
 6753,
 6920,
 6989,
 6998,
 7047,
 7059,
 7095,
 7119,
 7122,
 7127,
 7136,
 7144,
 7147,
 7150,
 7165,
 7194,
 7288,
 7335,
 7341,
 7360,
 7361,
 7409,
 7410,
 7500,
 7583,
 7593,
 7601,
 7632,
 7635,
 7654,
 7695,
 7720,
 7731,
 7749,
 7761,
 7762,
 7765,
 7768,
 7792,
 7795,
 7799,
 7803,
 7820,
 7826,
 7894,
 7915,
 7916,
 7921,
 7937,
 7967,
 7969,
 7983,
 7997,
 8007,
 8038,
 8042,
 8048,
 8051,
 8082,
 8103,
 8118,
 8122,
 8125,
 8129,
 8137,
 8159,
 8163,
 8174,
 8175,
 8180,
 8184,
 8186,
 8205,
 8294,
 8363,
 8375,
 8452,
 8456,
 8467,
 8615,
 8635,
 8658,
 8712,
 8723,
 8785,
 8797,
 8908,
 8914,
 9016,
 9024,
 9025,
 9256,
 9261,
 9589,
 9609,
 9862,
 10285,
 10364,
 10400,
 10430,
 10448,
 10722,
 10748,
 10797,
 10890,
 10895,
 10925,
 11086,
 11124,
 11419,
 11428,
 11525,
 11527,
 11529,
 11569,
 11583,
 11614,
 11617,
 11902,
 11980,
 12097,
 12178,
 12180,
 12206,
 12297,
 12327,
 12348,
 12506,
 12587,
 12741,
 12748,
 12813,
 13187,
 13204,
 13436,
 13561,
 14257,
 14286,
 14296,
 14328,
 14491,
 14525,
 15037,
 15380,
 15510,
 15606,
 15717,
 16255,
 16324,
 16741,
 17121,
 17525,
 17617,
 18554,
 18827,
 19310,
 21057,
 21648,
 22310,
 22873,
 23642,
 24020,
 24116,
 24197,
 24473,
 24513,
 24834,
 26331,
 27457,
 27458,
 31209,
 31210,
 31225,
 31234,
 31244,
 31246,
 31252,
 31260,
 31265,
 31266,
 31268,
 31272,
 31404,
 32594,
 33032,
 36822,
 60999,
 61005,
 61024,
 61027,
 61048,
 61052,
 61155,
 61209,
 61229,
 61252,
 61293,
 61325,
 61331,
 61386,
 61408,
 61527,
 61641,
 61653,
 61659,
 61670,
 61771,
 61809,
 61945,
 62087,
 62144,
 62332,
 62375,
 62378,
 62580,
 62725,
 62835,
 62900,
 62902,
 69963,
 82227,
 85522,
 88454,
 92979,
 98330,
 101010,
 101604,
 104721,
 106997,
 145742,
 220674,
 228769,
 235414,
 246728,
 439570,
 444539,
 444683,
 444972,
 520108,
 520191,
 520296,
 521238,
 526618,
 595928,
 637563,
 637566,
 637796,
 638014,
 638024,
 641256,
 641423,
 643820,
 778574,
 1549026,
 1549778,
 1550470,
 1551246,
 3578033,
 5273467,
 5280450,
 5315892,
 5352539,
 5355850,
 5362588,
 5362798,
 5362814,
 5363233,
 5363388,
 5364729,
 5365027,
 5365049,
 5366244,
 5367698,
 5367706,
 5368076,
 6429333,
 6999977,
 16220109,
 nan]

In [64]:
for k in range(0,5): #set the range of splits here
    # set a cv split as holdout data.
    
    lb_CIDs = testsplits.ix[k,:].values
    features = features[~features.CID.isin(test_CIDs)] # # remove the final test data features - this seems to be reduntant after the first iteration
    #print(targets.shape,features.shape)
    
    train_targets = targets[~targets['CID'].isin(lb_CIDs)]  # remove lb_data and use the rest
    train_features = features[~features.CID.isin(lb_CIDs)] # remove lb_data and use the rest 
    train,test = list(splits)[k]
    #should_use = [training_leaderboard_CIDs[i] for i in train]
    #used = list(train_features['CID'])
    #should_use = [int(x) for x in sorted(list(trainsplits.loc[k])) if not np.isnan(x)]
    #print("   ")
    #print("k=%d" % k)
    #print("Used: n=%d" % len(used),used)
    #print("   ")
    #print("Should use: n=%d" % len(should_use),should_use)
    #print("Equal?",used == should_use)
    #print(set(used).difference(should_use))
    #print("   ")
    #print("   ")
    #print("   ")
    #print("   ")
    #print("   ")
    
    #feature selection
    #if not os.path.exists('scores/LB_scores_morgan' + str(k)): 
    #    os.makedirs('scores/LB_scores_morgan' + str(k))
    for idx in range(21):
        if k < 0 and idx < 0:  # in case the selection stops at a point, set the right numbers to continue (selection takes time)
            pass
        else:
            print(datetime.datetime.now())
            print('split ' + str(k))
            print('selection for descriptor: ' + descriptor[idx])
            sys.stdout.flush()
            Y = train_targets[descriptor[idx]]
            X = train_features.ix[:,1:]

            selector = RandomizedLasso(alpha=0.025,selection_threshold=0.001,verbose=1,n_resampling=10,
                                      random_state=12).fit(X,Y)
            
            Y = targets[descriptor[idx]]
            X = train_features.ix[:,1:]
            selector2 = RandomizedLasso(alpha=0.025,selection_threshold=0.001,verbose=1,n_resampling=10,
                                      random_state=12).fit(X,Y)


            scores = pd.DataFrame(selector.scores_,index=X.columns)
            print(selector.scores_.shape,X.columns)
            print(scores.head())
            #scores.to_csv('scores/LB_scores_morgan' + str(k) + '/scores_'+str(idx)+'.csv')
    """


   
k=0
Used: n=338 [126, 177, 196, 240, 243, 244, 261, 311, 326, 356, 454, 460, 650, 660, 679, 807, 875, 887, 957, 994, 1030, 1032, 1049, 1060, 1068, 1110, 1136, 1183, 2214, 2345, 2682, 2758, 2879, 2969, 3776, 3893, 4133, 5610, 5779, 5780, 5950, 5960, 5961, 6054, 6106, 6137, 6140, 6184, 6287, 6374, 6386, 6448, 6505, 6549, 6560, 6569, 6584, 6590, 6658, 6753, 6920, 6989, 6998, 7047, 7059, 7095, 7119, 7122, 7127, 7136, 7144, 7147, 7150, 7165, 7194, 7288, 7335, 7341, 7360, 7361, 7409, 7410, 7500, 7583, 7593, 7601, 7632, 7635, 7654, 7695, 7720, 7731, 7749, 7761, 7762, 7765, 7768, 7792, 7795, 7799, 7803, 7820, 7826, 7894, 7915, 7916, 7921, 7937, 7967, 7969, 7983, 7997, 8007, 8038, 8042, 8048, 8051, 8082, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8163, 8174, 8175, 8180, 8184, 8186, 8205, 8294, 8363, 8375, 8452, 8456, 8467, 8615, 8635, 8658, 8712, 8723, 8785, 8797, 8908, 8914, 9016, 9024, 9025, 9256, 9261, 9589, 9609, 9862, 10285, 10364, 10400, 10430, 10448, 10722, 10748, 10797, 10890, 10895, 10925, 11086, 11124, 11419, 11428, 11525, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 11980, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12587, 12741, 12748, 12813, 13187, 13204, 13436, 13561, 14257, 14286, 14296, 14328, 14491, 14525, 15037, 15380, 15510, 15606, 15717, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 18635, 18827, 19310, 21057, 21648, 22310, 22873, 23642, 24020, 24116, 24197, 24473, 24513, 24834, 26331, 27457, 27458, 31209, 31210, 31225, 31234, 31244, 31246, 31252, 31260, 31265, 31266, 31268, 31272, 31404, 32594, 33032, 36822, 60999, 61005, 61024, 61027, 61048, 61052, 61155, 61209, 61229, 61252, 61293, 61325, 61331, 61386, 61408, 61527, 61641, 61653, 61659, 61670, 61771, 61809, 61945, 62087, 62144, 62332, 62375, 62378, 62580, 62725, 62835, 62900, 62902, 69963, 82227, 85522, 88454, 92979, 98330, 101010, 101604, 104721, 106997, 145742, 220674, 228769, 235414, 246728, 439570, 444539, 444683, 444972, 520108, 520191, 520296, 521238, 526618, 595928, 637563, 637566, 637796, 638014, 638024, 641256, 641423, 643820, 778574, 1549026, 1549778, 1550470, 1551246, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363388, 5364729, 5365027, 5365049, 5366244, 5367698, 5367706, 5368076, 6429333, 6999977, 16220109]
   
Should use: n=337 [240, 8174, 16220109, 19310, 243, 875, 6560, 679, 643820, 356, 5364729, 11529, 8129, 61252, 6920, 6448, 6106, 7761, 8007, 14296, 32594, 14286, 1549026, 88454, 5367706, 5315892, 7894, 8186, 6137, 244, 5367698, 18827, 24116, 12297, 26331, 14257, 7632, 101010, 6386, 24020, 10748, 60999, 106997, 7059, 7635, 439570, 14525, 8452, 7360, 8103, 7150, 9609, 807, 14491, 12741, 521238, 5950, 15717, 61048, 7194, 13436, 17525, 8712, 8163, 11086, 6505, 16324, 61670, 7500, 61771, 10925, 1549778, 460, 62835, 10430, 1060, 7762, 8175, 7583, 10400, 18554, 61659, 7768, 3578033, 7803, 5365027, 7749, 61155, 8038, 31265, 7916, 8635, 61809, 33032, 7997, 1136, 5273467, 8363, 1183, 12506, 7361, 7127, 6287, 8615, 8785, 10895, 11527, 5365049, 62580, 12178, 7654, 61386, 5610, 62378, 12180, 13561, 6753, 8042, 5352539, 11980, 12348, 1049, 9024, 61052, 31244, 5961, 145742, 5366244, 1030, 177, 12748, 8180, 12327, 1551246, 15037, 8048, 637566, 24834, 31260, 31272, 5362588, 16255, 2214, 11525, 92979, 11617, 6549, 957, 31209, 12206, 4133, 526618, 7967, 7288, 7731, 8467, 31268, 98330, 62087, 2879, 650, 246728, 6998, 61408, 27458, 6590, 326, 101604, 12097, 5362814, 17121, 13204, 8205, 7593, 8294, 7119, 10364, 6429333, 7795, 62902, 10285, 11583, 1550470, 7720, 8125, 8184, 8159, 454, 31225, 7695, 637563, 8051, 14328, 22873, 62375, 16741, 61653, 660, 12813, 61527, 196, 31266, 7915, 6989, 9261, 61293, 5280450, 31234, 228769, 9025, 520191, 7047, 22310, 311, 61331, 7136, 24473, 2345, 5355850, 6184, 2969, 27457, 5960, 11614, 62725, 5368076, 638014, 7826, 12587, 6374, 61325, 7969, 9862, 887, 3893, 126, 11124, 104721, 7409, 61209, 5363388, 8914, 21057, 7335, 641256, 7341, 444539, 8658, 6569, 61229, 5780, 11569, 31252, 13187, 8122, 8082, 9016, 7983, 520296, 6999977, 23642, 62332, 11428, 7122, 7820, 220674, 637796, 7165, 7095, 24513, 7799, 61024, 6140, 61945, 21648, 520108, 3776, 7601, 638024, 31210, 444972, 2682, 6054, 235414, 7937, 61005, 1032, 9256, 8375, 8137, 15606, 11419, 1068, 1110, 7921, 9589, 778574, 36822, 595928, 8118, 31404, 641423, 8908, 7410, 85522, 69963, 10890, 15510, 7765, 24197, 6658, 62900, 994, 8456, 82227, 5362798, 8797, 5363233, 2758, 10722, 61641, 7147, 6584, 7144, 61027, 15380, 31246, 11902, 261, 444683, 10797, 17617, 62144, 10448, 7792, 5779, 8723]
Equal? False
{18635}
   
   
   
   
   
   
k=1
Used: n=338 [126, 176, 177, 196, 239, 240, 241, 243, 261, 263, 325, 379, 460, 650, 660, 679, 807, 875, 887, 957, 994, 1001, 1032, 1049, 1110, 1136, 1183, 2214, 2346, 2682, 2758, 3776, 3893, 4133, 5541, 5610, 5779, 5780, 5950, 5960, 5961, 6050, 6057, 6140, 6184, 6287, 6374, 6386, 6448, 6501, 6505, 6549, 6560, 6569, 6584, 6590, 6658, 6753, 6826, 6943, 6989, 6997, 7059, 7095, 7119, 7122, 7144, 7147, 7150, 7151, 7165, 7194, 7288, 7335, 7360, 7361, 7410, 7463, 7500, 7519, 7583, 7593, 7601, 7635, 7654, 7695, 7720, 7731, 7749, 7762, 7765, 7768, 7792, 7795, 7799, 7803, 7826, 7894, 7915, 7916, 7921, 7937, 7967, 7983, 7991, 7997, 8007, 8030, 8038, 8042, 8048, 8063, 8091, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8163, 8175, 8184, 8186, 8193, 8294, 8452, 8456, 8467, 8635, 8658, 8697, 8712, 8723, 8785, 8797, 8892, 8908, 8914, 8918, 9016, 9024, 9025, 9256, 9261, 9589, 9609, 9862, 10285, 10400, 10430, 10448, 10722, 10748, 10797, 10890, 10895, 10925, 11086, 11124, 11419, 11525, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 12097, 12178, 12180, 12297, 12327, 12348, 12506, 12580, 12587, 12741, 12748, 12810, 12813, 13204, 13216, 13561, 14104, 14228, 14286, 14296, 14328, 14491, 14514, 15037, 15380, 15510, 15717, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 19310, 21057, 21648, 22310, 22386, 22873, 23235, 24020, 24116, 24197, 24473, 24513, 24834, 26331, 27457, 27458, 31209, 31225, 31234, 31244, 31246, 31265, 31266, 31268, 31272, 31404, 32594, 33032, 36822, 60999, 61005, 61024, 61027, 61048, 61130, 61151, 61155, 61185, 61192, 61199, 61204, 61229, 61252, 61293, 61325, 61337, 61386, 61408, 61527, 61641, 61653, 61659, 61670, 61771, 61918, 62087, 62144, 62332, 62374, 62375, 62378, 62444, 62572, 62580, 62725, 62835, 62900, 62902, 69963, 78925, 82227, 85522, 88454, 89440, 92979, 93375, 98330, 101010, 101604, 104721, 106997, 145742, 159055, 165675, 170833, 220674, 228769, 235414, 246728, 439570, 440967, 444539, 444683, 444972, 519539, 520108, 520191, 520296, 521238, 526618, 556940, 565690, 595928, 637563, 637566, 637776, 637796, 638014, 638024, 641256, 641423, 643820, 1549778, 1551246, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363388, 5363491, 5364729, 5365027, 5365049, 5367698, 5367706, 5368076, 6114390, 6429333, 6999977, 16220109]
   
Should use: n=337 [62375, 240, 8137, 887, 24116, 33032, 8186, 36822, 8118, 1551246, 22873, 520296, 21648, 660, 17617, 220674, 61005, 12297, 11419, 62444, 6057, 556940, 82227, 4133, 6549, 15037, 8892, 7583, 31209, 526618, 637563, 650, 11617, 85522, 10448, 6114390, 14514, 18554, 5960, 5363388, 7410, 61185, 1136, 61918, 2758, 8129, 9016, 61048, 61204, 263, 61325, 6560, 26331, 14296, 7792, 31244, 11086, 6429333, 92979, 27457, 177, 261, 15380, 17525, 31404, 228769, 3776, 5362798, 12587, 5362814, 7122, 637776, 22310, 1049, 12180, 6590, 5950, 69963, 23235, 7768, 2346, 14104, 5780, 61252, 641423, 61229, 9024, 5280450, 11525, 6943, 6448, 7720, 6050, 62900, 78925, 7894, 7144, 16324, 1183, 61771, 8723, 31234, 7795, 325, 8184, 13561, 16220109, 15510, 62572, 101010, 7361, 8103, 27458, 10722, 9256, 8908, 6184, 61293, 444539, 6569, 5365049, 8159, 61024, 10430, 460, 61386, 521238, 440967, 24473, 637566, 520108, 6140, 1549778, 98330, 8697, 9862, 5779, 24020, 5541, 10925, 5367706, 6374, 6997, 14286, 439570, 61151, 8063, 10895, 62902, 7967, 7921, 19310, 7799, 8163, 8091, 12178, 60999, 807, 994, 8038, 2214, 7059, 8193, 62725, 6989, 5368076, 31246, 61199, 7695, 5363491, 62087, 7500, 643820, 31268, 7194, 24513, 17121, 61670, 1032, 519539, 5365027, 6584, 7147, 11902, 638014, 8456, 14491, 10285, 5273467, 170833, 7803, 9261, 8175, 61192, 5352539, 5315892, 159055, 6505, 61408, 235414, 24834, 8294, 31265, 61337, 8797, 638024, 12580, 7463, 104721, 12348, 13216, 7762, 32594, 12506, 7095, 8467, 8658, 379, 15717, 61659, 241, 145742, 565690, 7593, 6287, 11583, 5961, 7151, 11569, 875, 520191, 12327, 6753, 101604, 11527, 239, 10400, 8452, 9589, 10748, 7288, 7991, 14228, 8785, 8914, 61527, 61653, 7765, 1110, 11529, 31266, 7360, 7826, 444683, 6826, 7150, 2682, 8918, 126, 5610, 8712, 8125, 6999977, 7635, 7997, 21057, 12741, 7119, 62378, 7519, 62144, 7654, 12097, 10797, 165675, 89440, 16255, 6658, 7335, 7915, 8030, 10890, 11124, 16741, 12748, 1001, 5355850, 31272, 8635, 5364729, 196, 8007, 62374, 246728, 5362588, 9609, 957, 61130, 7937, 61641, 13204, 7983, 12810, 12813, 62580, 595928, 31225, 62835, 5367698, 7731, 7165, 24197, 6386, 3893, 106997, 444972, 14328, 8048, 7749, 62332, 61027, 679, 6501, 61155, 7916, 22386, 637796, 8122, 11614, 641256, 88454, 3578033, 9025, 8042, 7601, 243, 93375]
Equal? False
{176}
   
   
   
   
   
   
k=2
Used: n=338 [176, 177, 196, 239, 241, 243, 261, 263, 311, 326, 379, 454, 460, 660, 679, 807, 875, 957, 994, 1001, 1030, 1032, 1049, 1060, 1068, 1110, 1136, 1183, 2682, 2879, 2969, 3776, 3893, 4133, 5779, 5780, 5950, 5960, 5961, 6057, 6106, 6140, 6184, 6213, 6276, 6448, 6501, 6505, 6549, 6560, 6584, 6590, 6658, 6753, 6826, 6920, 6943, 6989, 6997, 6998, 7047, 7059, 7095, 7119, 7127, 7144, 7147, 7150, 7151, 7165, 7288, 7335, 7341, 7360, 7361, 7409, 7463, 7500, 7519, 7583, 7593, 7601, 7632, 7635, 7654, 7720, 7731, 7749, 7761, 7762, 7765, 7792, 7795, 7799, 7820, 7824, 7826, 7937, 7967, 7983, 7991, 7997, 8007, 8030, 8038, 8042, 8051, 8063, 8077, 8078, 8082, 8091, 8093, 8103, 8118, 8122, 8129, 8137, 8159, 8163, 8174, 8175, 8180, 8184, 8186, 8193, 8205, 8294, 8363, 8375, 8452, 8456, 8615, 8635, 8658, 8697, 8712, 8723, 8797, 8857, 8908, 8918, 9016, 9024, 9025, 9256, 9589, 9862, 10285, 10364, 10400, 10430, 10448, 10722, 10748, 10797, 10882, 10890, 10925, 11086, 11124, 11428, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 11980, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12580, 12587, 12741, 12810, 12813, 13187, 13204, 13216, 13436, 13561, 14104, 14228, 14257, 14286, 14491, 14514, 14525, 15037, 15380, 15510, 15606, 15717, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 18635, 19310, 21057, 21648, 22310, 22386, 22873, 23235, 23642, 24116, 24197, 24473, 24513, 24834, 26331, 27457, 27458, 31209, 31225, 31234, 31244, 31246, 31249, 31252, 31265, 31266, 31268, 31272, 31404, 32594, 36822, 60999, 61005, 61024, 61027, 61048, 61052, 61138, 61151, 61155, 61177, 61185, 61192, 61209, 61229, 61293, 61325, 61331, 61337, 61408, 61527, 61641, 61653, 61659, 61670, 61771, 61809, 61918, 61945, 62087, 62144, 62332, 62374, 62375, 62378, 62572, 62580, 62725, 62835, 62900, 62902, 69963, 78925, 82227, 85522, 88454, 92979, 98330, 101010, 101604, 106997, 145742, 159055, 170833, 220674, 228769, 235414, 246728, 439570, 440967, 444539, 444683, 444972, 519539, 520191, 520296, 521238, 526618, 556940, 565690, 595928, 637563, 637566, 637776, 637796, 641256, 641423, 778574, 1549026, 1550470, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363388, 5364729, 5365027, 5365049, 5366244, 5367698, 5368076, 6114390, 6429333, 6999977, 16220109]
   
Should use: n=337 [14286, 62332, 15510, 101604, 31265, 36822, 1001, 31244, 439570, 61918, 7937, 11428, 61527, 16324, 13561, 14104, 6920, 5315892, 12506, 228769, 12741, 15037, 8635, 5280450, 61331, 8078, 16741, 21057, 3776, 1110, 7593, 61337, 5367698, 8797, 1183, 159055, 17617, 875, 7654, 241, 8159, 61177, 8042, 460, 1550470, 6505, 595928, 444972, 2969, 8163, 8077, 61293, 12097, 5950, 10364, 101010, 239, 8118, 637563, 7341, 31404, 61209, 11583, 12580, 7165, 4133, 31246, 679, 11902, 556940, 26331, 6140, 1030, 637776, 10722, 8082, 8294, 778574, 7635, 61229, 5960, 641256, 12587, 8180, 62374, 6584, 3578033, 7601, 263, 8063, 170833, 62572, 8712, 5366244, 1068, 6448, 246728, 8193, 14257, 8137, 8205, 7119, 10748, 6658, 92979, 8723, 8908, 1136, 12180, 176, 8091, 88454, 8363, 641423, 11617, 11980, 60999, 61809, 6943, 62375, 27458, 12348, 6184, 27457, 7632, 7983, 444539, 15380, 31268, 637566, 14228, 13187, 8038, 6753, 7127, 10448, 1032, 9024, 8103, 7967, 9862, 10430, 17121, 145742, 6276, 22386, 22310, 11124, 61192, 18554, 69963, 8030, 62725, 196, 61945, 7095, 5365027, 14525, 15717, 98330, 9589, 5362798, 7765, 61670, 565690, 61024, 6106, 21648, 11527, 10797, 8051, 7761, 31225, 5363388, 5273467, 7519, 85522, 5355850, 7795, 7583, 9016, 61027, 6997, 379, 61325, 7820, 7997, 7824, 12327, 454, 5961, 62087, 7150, 23642, 24834, 994, 16220109, 62835, 7361, 31272, 12813, 7731, 10882, 61155, 9256, 15606, 5363233, 6114390, 8697, 807, 7409, 8857, 8174, 7059, 24513, 6989, 62900, 957, 7991, 24197, 11569, 7147, 5368076, 2682, 61185, 8452, 78925, 7792, 444683, 61052, 23235, 24473, 520296, 177, 62144, 7360, 9025, 61641, 10925, 6429333, 31266, 7799, 1549026, 5364729, 14514, 7720, 3893, 61151, 5362588, 62580, 8007, 12297, 61048, 7144, 11086, 5362814, 6501, 440967, 8615, 31209, 12206, 8456, 14491, 7335, 12810, 1049, 6213, 61138, 8918, 519539, 10285, 2879, 8658, 7288, 17525, 13216, 61659, 12178, 7826, 6826, 8122, 22873, 660, 19310, 1060, 8129, 5365049, 8184, 18635, 7463, 10890, 11614, 220674, 24116, 520191, 31234, 7500, 326, 6560, 7749, 61653, 521238, 6590, 8375, 82227, 6057, 61005, 7151, 32594, 62902, 311, 7762, 5779, 8175, 7047, 16255, 8186, 235414, 31249, 243, 526618, 106997, 31252, 6999977, 62378, 13204, 5352539, 11529, 13436, 6998, 10400, 261, 637796, 8093, 5780, 61771, 61408]
Equal? False
{6549}
   
   
   
   
   
   
k=3
Used: n=338 [126, 176, 177, 239, 240, 243, 244, 263, 325, 326, 356, 379, 454, 460, 807, 875, 887, 957, 994, 1001, 1030, 1032, 1049, 1060, 1068, 1110, 2214, 2345, 2346, 2682, 2758, 2879, 3776, 3893, 5541, 5610, 5779, 5780, 5950, 5961, 6050, 6106, 6137, 6184, 6213, 6276, 6287, 6374, 6448, 6501, 6505, 6560, 6569, 6584, 6590, 6658, 6753, 6920, 6943, 6989, 6997, 7059, 7095, 7119, 7122, 7127, 7136, 7144, 7147, 7150, 7165, 7288, 7335, 7341, 7360, 7361, 7409, 7410, 7463, 7500, 7519, 7593, 7632, 7654, 7695, 7720, 7731, 7749, 7761, 7762, 7765, 7768, 7792, 7799, 7803, 7820, 7824, 7826, 7915, 7921, 7937, 7967, 7969, 7983, 7991, 7997, 8007, 8030, 8042, 8048, 8051, 8063, 8077, 8078, 8082, 8091, 8093, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8163, 8174, 8180, 8184, 8186, 8193, 8205, 8363, 8456, 8615, 8635, 8658, 8697, 8712, 8723, 8797, 8892, 8908, 8914, 8918, 9024, 9025, 9256, 9261, 9589, 9609, 9862, 10285, 10364, 10400, 10448, 10722, 10748, 10797, 10882, 10890, 10895, 10925, 11086, 11124, 11419, 11428, 11525, 11527, 11529, 11569, 11583, 11614, 11617, 11902, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12580, 12741, 12748, 12810, 12813, 13187, 13204, 13216, 13436, 13561, 14104, 14228, 14257, 14286, 14296, 14328, 14491, 14514, 14525, 15037, 15380, 15510, 15606, 16255, 16324, 16741, 17121, 17525, 17617, 18554, 19310, 21057, 21648, 22310, 22386, 23235, 23642, 24020, 24116, 24473, 24513, 26331, 27457, 27458, 31209, 31210, 31225, 31234, 31244, 31246, 31249, 31252, 31260, 31265, 31266, 31268, 31272, 31404, 32594, 33032, 36822, 60999, 61005, 61024, 61027, 61048, 61052, 61130, 61151, 61185, 61192, 61199, 61204, 61229, 61293, 61325, 61331, 61337, 61386, 61408, 61527, 61641, 61653, 61670, 61771, 61809, 61918, 61945, 62087, 62144, 62332, 62375, 62378, 62444, 62580, 62725, 62835, 62900, 62902, 82227, 85522, 88454, 92979, 93375, 98330, 101010, 101604, 104721, 106997, 145742, 159055, 165675, 220674, 246728, 439570, 440967, 444683, 444972, 519539, 520108, 520191, 520296, 521238, 556940, 595928, 637563, 637566, 637796, 638014, 638024, 641256, 641423, 643820, 778574, 1549026, 1550470, 1551246, 3578033, 5273467, 5280450, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363491, 5365049, 5366244, 5367698, 5367706, 6114390, 6429333, 6999977, 16220109]
   
Should use: n=337 [6505, 61945, 7720, 637563, 16741, 62725, 8007, 101010, 1551246, 12178, 8635, 12297, 7765, 11614, 8125, 7792, 7937, 8051, 12580, 14286, 8077, 32594, 93375, 8048, 165675, 82227, 8908, 8797, 5352539, 31210, 7095, 6584, 520191, 61293, 126, 5363491, 8658, 8918, 13436, 61918, 641423, 8122, 7341, 6920, 994, 98330, 61052, 11525, 444972, 13187, 62378, 5362798, 8137, 8159, 24513, 21057, 7761, 9025, 92979, 11124, 8093, 61204, 62332, 10797, 444683, 22386, 12741, 61151, 7409, 14296, 325, 1068, 5950, 106997, 104721, 7921, 19310, 2345, 7165, 61771, 7820, 8103, 6997, 61653, 27458, 10285, 7150, 643820, 556940, 1001, 61527, 243, 8129, 23642, 7500, 7288, 7127, 31265, 145742, 31249, 62144, 85522, 10400, 9589, 5610, 7361, 8712, 8180, 61386, 13216, 11569, 14525, 356, 11086, 12348, 6114390, 10364, 61809, 8193, 6753, 6658, 14491, 62444, 12206, 16255, 62902, 7824, 14228, 8174, 5362814, 10448, 239, 12813, 5961, 14328, 8363, 11529, 7144, 1110, 6050, 61641, 61670, 27457, 8030, 62375, 10722, 6429333, 7915, 7803, 8892, 15380, 17121, 3578033, 9862, 33032, 263, 62900, 7136, 60999, 9256, 7122, 641256, 6999977, 7997, 11419, 1032, 638024, 5366244, 101604, 440967, 5355850, 7695, 8078, 61048, 5367698, 10882, 460, 6213, 8091, 16324, 8697, 23235, 6943, 240, 519539, 7519, 24116, 875, 6374, 5541, 26331, 6287, 5273467, 159055, 61325, 62835, 14104, 1030, 246728, 7632, 10890, 7731, 807, 11617, 61229, 15510, 11428, 61005, 177, 16220109, 6448, 11902, 244, 5367706, 7749, 61027, 220674, 3893, 7360, 8914, 7654, 61337, 7463, 8163, 520296, 7967, 176, 6590, 61185, 6569, 7410, 5780, 18554, 12097, 6560, 595928, 15606, 637796, 12810, 13561, 1550470, 12180, 61192, 6106, 7799, 7119, 8723, 8082, 31252, 7335, 12748, 61199, 11583, 12327, 454, 7768, 7593, 61130, 8205, 957, 6276, 61408, 5280450, 14514, 7147, 24020, 1060, 61024, 8456, 7991, 31268, 2879, 2346, 5363233, 2682, 521238, 7983, 31244, 1549026, 9261, 778574, 887, 31234, 2214, 31404, 7969, 3776, 9024, 8184, 31225, 379, 8042, 10895, 61331, 10925, 7059, 5779, 88454, 11527, 6501, 12506, 10748, 9609, 31272, 36822, 520108, 24473, 5362588, 5365049, 8063, 637566, 31266, 6989, 2758, 8186, 17525, 8118, 62087, 22310, 17617, 21648, 326, 7826, 439570, 31209, 7762, 31260, 6184, 15037, 1049, 62580, 14257, 8615, 31246, 6137, 13204, 638014]
Equal? False
{5315892}
   
   
   
   
   
   
k=4
Used: n=338 [126, 176, 177, 196, 239, 240, 241, 243, 261, 263, 311, 325, 356, 379, 454, 460, 679, 807, 875, 887, 957, 994, 1001, 1030, 1032, 1049, 1068, 1110, 1136, 1183, 2214, 2345, 2346, 2758, 2969, 3776, 3893, 5541, 5610, 5779, 5950, 5960, 5961, 6050, 6054, 6057, 6106, 6140, 6184, 6213, 6276, 6287, 6374, 6386, 6448, 6501, 6505, 6549, 6560, 6590, 6658, 6753, 6826, 6920, 6989, 7047, 7059, 7095, 7119, 7122, 7127, 7136, 7144, 7147, 7150, 7165, 7194, 7288, 7335, 7341, 7360, 7361, 7409, 7410, 7500, 7519, 7583, 7593, 7632, 7635, 7654, 7695, 7720, 7731, 7749, 7761, 7762, 7765, 7768, 7792, 7795, 7799, 7803, 7820, 7824, 7826, 7894, 7915, 7916, 7921, 7937, 7967, 7969, 7983, 8007, 8030, 8038, 8042, 8048, 8051, 8063, 8077, 8078, 8082, 8091, 8093, 8103, 8118, 8122, 8125, 8129, 8137, 8159, 8174, 8175, 8184, 8205, 8452, 8456, 8467, 8615, 8635, 8658, 8697, 8712, 8723, 8797, 8857, 8892, 8908, 8914, 9016, 9024, 9025, 9261, 9589, 9609, 9862, 10364, 10722, 10748, 10797, 10890, 10895, 10925, 11086, 11419, 11428, 11529, 11569, 11583, 11614, 11617, 11902, 12097, 12178, 12180, 12206, 12297, 12327, 12348, 12506, 12587, 12741, 12748, 12810, 12813, 13187, 13204, 13216, 13436, 13561, 14228, 14257, 14328, 14491, 14514, 14525, 15037, 15510, 15717, 16741, 17121, 17525, 17617, 18554, 18635, 18827, 19310, 21057, 21648, 22310, 22873, 23235, 23642, 24020, 24116, 24513, 26331, 27457, 31209, 31210, 31225, 31234, 31244, 31246, 31252, 31260, 31265, 31266, 31268, 32594, 33032, 36822, 60999, 61005, 61048, 61052, 61130, 61138, 61151, 61155, 61177, 61185, 61192, 61199, 61209, 61229, 61252, 61293, 61325, 61331, 61337, 61386, 61408, 61641, 61659, 61670, 61771, 61809, 61918, 61945, 62087, 62144, 62332, 62374, 62375, 62378, 62444, 62572, 62580, 62835, 62900, 69963, 78925, 82227, 85522, 88454, 89440, 92979, 93375, 98330, 101604, 104721, 106997, 145742, 159055, 165675, 170833, 220674, 235414, 246728, 439570, 440967, 444539, 444683, 444972, 520108, 520296, 521238, 526618, 556940, 565690, 595928, 637563, 637566, 637776, 637796, 638014, 638024, 641256, 643820, 778574, 1549026, 1549778, 1550470, 1551246, 3578033, 5273467, 5315892, 5352539, 5355850, 5362588, 5362798, 5362814, 5363233, 5363388, 5363491, 5364729, 5365027, 5365049, 5366244, 5367706, 5368076, 6114390, 6429333, 6999977, 16220109]
   
Should use: n=337 [31225, 8063, 61155, 85522, 1550470, 61005, 31246, 165675, 12327, 7341, 14491, 8697, 1030, 62087, 61771, 12180, 88454, 8048, 7826, 7695, 11902, 8030, 11529, 6213, 10748, 31266, 7749, 13436, 62332, 643820, 13561, 6753, 8125, 7361, 8184, 2758, 7593, 18554, 13216, 7409, 14228, 5368076, 9589, 93375, 69963, 62444, 9025, 1136, 2346, 7937, 12206, 7122, 7768, 13204, 24116, 31234, 8093, 444972, 11583, 27457, 6057, 8857, 62375, 6184, 6590, 23642, 11569, 61659, 12748, 9016, 5355850, 8038, 460, 7136, 957, 5541, 6560, 5367706, 875, 1549026, 1110, 21648, 61293, 8122, 7519, 8175, 18827, 31244, 638014, 8205, 220674, 7799, 679, 5365049, 5362814, 7915, 5362588, 23235, 62572, 17617, 61138, 454, 638024, 11086, 6549, 8007, 126, 12178, 526618, 637776, 8082, 5779, 61185, 521238, 61151, 7824, 887, 61670, 106997, 8658, 7410, 7635, 7765, 2969, 104721, 778574, 6140, 565690, 7916, 33032, 1068, 8467, 31210, 61641, 15717, 14328, 62144, 6374, 31265, 61331, 7921, 12506, 5363388, 5363233, 5352539, 637563, 7761, 1001, 6054, 159055, 14514, 6999977, 61337, 3578033, 31268, 7047, 12813, 18635, 61252, 6114390, 12587, 7144, 10925, 60999, 177, 5362798, 32594, 61199, 6448, 16741, 8452, 92979, 261, 6505, 12097, 17121, 62900, 6989, 1032, 8042, 8892, 241, 10895, 7731, 14525, 145742, 8723, 61918, 61177, 98330, 9609, 61229, 21057, 7335, 7632, 89440, 7127, 61809, 379, 8712, 7795, 176, 10364, 7360, 556940, 520296, 641256, 7803, 8174, 7147, 10890, 7119, 31252, 78925, 62374, 9862, 7894, 61192, 24020, 8456, 11428, 15510, 239, 12810, 6050, 61325, 12297, 62835, 7969, 12348, 6276, 8129, 595928, 1549778, 8091, 7059, 196, 22873, 61052, 26331, 31209, 7983, 7165, 5961, 1551246, 22310, 5364729, 61130, 5366244, 5363491, 62580, 7288, 62378, 6826, 61209, 637566, 8908, 8051, 7654, 311, 1183, 8914, 444683, 9261, 7500, 7150, 7095, 6501, 14257, 439570, 8615, 7720, 235414, 5273467, 9024, 7792, 6287, 8137, 11614, 2214, 13187, 5610, 356, 7194, 243, 10797, 8118, 325, 5960, 6429333, 994, 17525, 2345, 61945, 7820, 10722, 36822, 8078, 61408, 24513, 5315892, 12741, 520108, 8635, 61048, 1049, 101604, 6386, 8103, 807, 82227, 8159, 637796, 6920, 444539, 440967, 5950, 7583, 3776, 240, 6106, 8077, 3893, 170833, 31260, 11419, 263, 61386, 8797, 15037, 7762, 246728, 7967, 16220109, 6658, 11617, 19310]
Equal? False
{5365027}
   
   
   
   
   

In [66]:
len(list(splits)[k][0])


Out[66]:
337

In [67]:
len(lb_CIDs)


Out[67]:
69

In [60]:
X.columns.shape


Out[60]:
(14612,)

In [61]:
train_features


Out[61]:
CID complexity from pubmed MW AMW Sv Se Sp Si Mv Me ... 91305518_2 91411526_2 91541756_2 91552833_2 91563027_2 91595028_2 91614181_2 91617014_2 91617930_2 91618238_2
0 126 0.181128 0.270753 0.030587 0.262264 0.219126 0.253846 0.214989 0.216981 0.425532 ... 0.000013 0.000331 0.014024 0.000296 0.021098 0.000186 0.003159 0.002299 0.000138 0.011080
2 177 0.020039 0.067721 0.015501 0.075556 0.083688 0.078074 0.089782 0.106346 0.382979 ... 0.000014 0.000092 0.000961 0.000339 0.000657 0.000008 0.000098 0.000221 0.000037 0.001932
4 196 0.221790 0.333247 0.023779 0.306622 0.308572 0.294339 0.305729 0.138079 0.539007 ... 0.001029 0.000737 0.013662 0.009383 0.001954 0.000820 0.003130 0.005600 0.002189 0.010702
6 240 0.141051 0.229143 0.026007 0.240877 0.197235 0.241008 0.197436 0.214408 0.269504 ... 0.000000 0.000084 0.021801 0.000134 0.008264 0.000479 0.005696 0.002347 0.000035 0.005194
8 243 0.202335 0.270753 0.030587 0.262264 0.219126 0.253846 0.214989 0.216981 0.425532 ... 0.000051 0.000187 0.044910 0.000528 0.010226 0.000747 0.007957 0.003826 0.000216 0.007831
9 244 0.107782 0.234396 0.019297 0.256640 0.228315 0.262527 0.232498 0.171527 0.219858 ... 0.000116 0.000189 0.025945 0.000134 0.012913 0.000906 0.009812 0.003065 0.000078 0.008116
10 261 0.048249 0.140695 0.009412 0.166906 0.178825 0.177695 0.188943 0.095197 0.205674 ... 0.000217 0.000556 0.003460 0.000902 0.001355 0.000048 0.000379 0.001313 0.000325 0.006664
14 311 0.441634 0.452824 0.038815 0.355019 0.343198 0.311362 0.323340 0.180103 0.900709 ... 0.000614 0.001629 0.010255 0.005364 0.007480 0.000739 0.003606 0.004504 0.001656 0.006944
17 326 0.235409 0.338604 0.016726 0.377901 0.339949 0.390425 0.346192 0.162093 0.170213 ... 0.000313 0.000503 0.012979 0.001143 0.016830 0.000362 0.004182 0.003648 0.000541 0.013611
18 356 0.048638 0.250286 0.000000 0.343952 0.378270 0.385618 0.404788 0.065180 0.000000 ... 0.002901 0.000751 0.002844 0.005614 0.000511 0.000420 0.001417 0.002331 0.005848 0.005102
20 454 0.115953 0.286643 0.006000 0.349575 0.369098 0.376937 0.387279 0.088336 0.113475 ... 0.002473 0.000507 0.004109 0.006292 0.001075 0.000417 0.001378 0.002237 0.003400 0.007182
21 460 0.161479 0.276006 0.023738 0.278027 0.250206 0.275393 0.250065 0.176672 0.361702 ... 0.000115 0.000744 0.033934 0.000296 0.021098 0.000419 0.004936 0.003800 0.000215 0.011080
22 650 0.139105 0.177052 0.022693 0.172529 0.169636 0.169014 0.171434 0.145798 0.460993 ... 0.000122 0.000356 0.005407 0.002309 0.000602 0.000191 0.001516 0.004253 0.000577 0.001666
23 660 0.321012 0.338474 0.027778 0.337850 0.283200 0.331948 0.279102 0.210978 0.354610 ... 0.000000 0.001248 0.024830 0.000031 0.015625 0.000469 0.004649 0.003483 0.000671 0.012152
24 679 0.056420 0.156377 0.027917 0.126825 0.132518 0.146194 0.138201 0.114923 0.319149 ... 0.000055 0.000205 0.000932 0.000335 0.000641 0.000017 0.000218 0.000490 0.000082 0.001850
28 807 0.000000 0.613180 1.000000 0.057370 0.000000 0.137541 0.000000 1.000000 0.361702 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
29 875 0.260700 0.343493 0.040700 0.258046 0.257234 0.220422 0.241674 0.174099 1.000000 ... 0.000831 0.000758 0.006574 0.002722 0.003265 0.000480 0.003219 0.001736 0.001053 0.016198
30 887 0.003891 0.036487 0.007730 0.045645 0.067191 0.049796 0.075264 0.040309 0.397163 ... 0.000056 0.000093 0.001008 0.000345 0.000683 0.000008 0.000100 0.000057 0.000037 0.002066
31 957 0.085214 0.291896 0.003502 0.365339 0.400178 0.398456 0.422341 0.072041 0.092199 ... 0.005048 0.000507 0.004109 0.007223 0.001911 0.000601 0.001744 0.001644 0.005312 0.007182
33 994 0.297665 0.382789 0.022758 0.384153 0.348939 0.381914 0.350431 0.169811 0.340426 ... 0.000197 0.000315 0.043509 0.001512 0.011080 0.001652 0.015200 0.005272 0.000408 0.012544
35 1030 0.040661 0.151071 0.011918 0.158381 0.184219 0.162284 0.191992 0.076329 0.390071 ... 0.001107 0.000359 0.002005 0.001314 0.003858 0.000155 0.000861 0.000213 0.000327 0.015625
37 1032 0.078210 0.145818 0.019101 0.142618 0.153139 0.140736 0.156915 0.113208 0.482270 ... 0.000342 0.000562 0.010916 0.001314 0.002469 0.000155 0.000861 0.001918 0.000445 0.006944
38 1049 0.060117 0.158873 0.022832 0.174474 0.145947 0.178515 0.152051 0.196398 0.198582 ... 0.000000 0.000000 0.019837 0.000000 0.002241 0.000189 0.001863 0.001257 0.000000 0.003328
39 1060 0.163424 0.182175 0.036015 0.148241 0.143967 0.132055 0.139420 0.176672 0.815603 ... 0.000122 0.000356 0.010597 0.002309 0.001355 0.000191 0.001516 0.003361 0.000577 0.003748
40 1068 0.005447 0.114766 0.020497 0.105468 0.110611 0.133356 0.120648 0.099485 0.070922 ... 0.000125 0.000092 0.000240 0.000151 0.000164 0.000008 0.000098 0.000221 0.000037 0.004347
41 1110 0.180156 0.260273 0.032986 0.215303 0.213435 0.194718 0.206554 0.162093 0.773050 ... 0.000329 0.000772 0.012346 0.004954 0.002189 0.000545 0.002278 0.004017 0.001268 0.008900
42 1136 0.173346 0.325627 0.029076 0.292803 0.264245 0.309269 0.267618 0.170669 0.297872 ... 0.000320 0.000744 0.014024 0.000525 0.006117 0.000186 0.001404 0.003002 0.000421 0.024931
43 1183 0.262646 0.348851 0.029493 0.329325 0.288594 0.316508 0.282137 0.198113 0.475177 ... 0.000112 0.001276 0.022500 0.000785 0.022420 0.000223 0.003044 0.005378 0.000412 0.017200
44 2214 0.324903 0.385338 0.025787 0.374970 0.336171 0.366305 0.331717 0.181818 0.411348 ... 0.000309 0.001600 0.029873 0.001521 0.021948 0.000414 0.004713 0.011228 0.000835 0.016649
45 2345 0.414397 0.505149 0.026007 0.518904 0.427827 0.516373 0.421818 0.214408 0.269504 ... 0.000101 0.000440 0.032362 0.000671 0.014489 0.001293 0.037623 0.007243 0.000379 0.006316
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
434 638024 0.801556 0.695282 0.022358 0.707855 0.627107 0.707103 0.624394 0.179245 0.290780 ... 0.000010 0.002223 0.008190 0.000366 0.007869 0.000979 0.002748 0.003714 0.001390 0.004300
435 641256 0.140467 0.208416 0.011257 0.242492 0.242882 0.255797 0.253042 0.116638 0.177305 ... 0.000210 0.000192 0.001712 0.000547 0.002163 0.000030 0.000204 0.000197 0.000079 0.003144
436 641423 0.412451 0.442682 0.023224 0.459081 0.394833 0.459818 0.392781 0.192110 0.269504 ... 0.000104 0.000292 0.027778 0.001013 0.009335 0.000718 0.008521 0.004727 0.000286 0.004959
437 643820 0.291829 0.354364 0.007551 0.425162 0.433172 0.455011 0.451378 0.102058 0.106383 ... 0.001008 0.000717 0.005625 0.003139 0.001830 0.000361 0.001712 0.004444 0.000840 0.006719
438 778574 0.414397 0.484422 0.017918 0.520519 0.473473 0.531162 0.477424 0.160377 0.219858 ... 0.000934 0.000652 0.030914 0.002785 0.011080 0.001119 0.011032 0.006520 0.001136 0.009452
441 1549026 0.453307 0.463695 0.011257 0.522134 0.519136 0.545951 0.533044 0.116638 0.177305 ... 0.000756 0.001194 0.008359 0.004885 0.001568 0.000585 0.003872 0.007901 0.001577 0.005328
442 1549778 0.447471 0.458572 0.008196 0.546423 0.544805 0.582909 0.565072 0.109777 0.099291 ... 0.000756 0.000914 0.006400 0.005665 0.000882 0.000652 0.003338 0.007901 0.001811 0.005328
443 1550470 0.282101 0.317747 0.016146 0.339435 0.328846 0.346709 0.334708 0.135506 0.269504 ... 0.000451 0.000724 0.005768 0.002031 0.001870 0.000118 0.001365 0.002883 0.000541 0.002500
444 1551246 0.404669 0.341309 0.010987 0.402818 0.383797 0.427525 0.399968 0.130360 0.092199 ... 0.000448 0.000498 0.002500 0.002009 0.000458 0.000223 0.001036 0.003600 0.000538 0.002419
446 3578033 0.260700 0.348981 0.018146 0.369347 0.345343 0.374986 0.349226 0.151801 0.269504 ... 0.000312 0.000981 0.010126 0.001547 0.011562 0.000312 0.003589 0.003624 0.000843 0.017485
447 5273467 0.437743 0.520909 0.016538 0.566194 0.521050 0.580986 0.527019 0.153516 0.198582 ... 0.001367 0.000865 0.032707 0.004561 0.010471 0.001392 0.011662 0.007287 0.001746 0.011378
448 5280450 0.519455 0.682617 0.008155 0.796123 0.804547 0.844786 0.830541 0.104631 0.120567 ... 0.007441 0.000560 0.006755 0.015625 0.001058 0.001534 0.002770 0.003104 0.007765 0.004395
451 5315892 0.196498 0.302117 0.018897 0.332227 0.292372 0.340601 0.296597 0.174099 0.191489 ... 0.000112 0.000180 0.022784 0.000284 0.011562 0.000598 0.006137 0.002192 0.000076 0.006830
453 5352539 0.287938 0.395974 0.010457 0.446548 0.455063 0.467877 0.468945 0.107204 0.191489 ... 0.002059 0.001242 0.009290 0.005130 0.001718 0.000411 0.002962 0.005169 0.001618 0.008789
455 5355850 0.371595 0.447935 0.019591 0.474844 0.425913 0.481365 0.427843 0.168954 0.241135 ... 0.000418 0.000895 0.035679 0.001801 0.011295 0.000949 0.010221 0.008789 0.000643 0.009720
456 5362588 0.221790 0.281390 0.008922 0.333812 0.338035 0.355390 0.352217 0.107204 0.134752 ... 0.001048 0.000754 0.004504 0.003341 0.001162 0.000269 0.001424 0.003065 0.000868 0.005194
457 5362798 0.169066 0.359617 0.005281 0.440925 0.464235 0.476558 0.486440 0.087479 0.092199 ... 0.002363 0.000479 0.003501 0.005046 0.001666 0.000357 0.001644 0.001508 0.002096 0.005827
458 5362814 0.144942 0.244903 0.009918 0.288167 0.290458 0.305593 0.302637 0.111492 0.148936 ... 0.000322 0.000521 0.004444 0.001196 0.001149 0.000067 0.000554 0.001712 0.000311 0.007972
459 5363233 0.243191 0.354364 0.007551 0.425162 0.433172 0.455011 0.451378 0.102058 0.106383 ... 0.002073 0.000705 0.007287 0.006906 0.000987 0.000469 0.002076 0.005237 0.002998 0.006299
460 5363388 0.225681 0.323000 0.012497 0.355199 0.359926 0.368256 0.369770 0.114065 0.234043 ... 0.001015 0.000985 0.010255 0.003841 0.001870 0.000266 0.002133 0.005450 0.000845 0.006944
463 5364729 0.305447 0.317747 0.016146 0.339435 0.328846 0.346709 0.334708 0.135506 0.269504 ... 0.000318 0.000512 0.006072 0.002630 0.001954 0.000151 0.001760 0.003748 0.000547 0.002675
464 5365027 0.149416 0.323130 0.005600 0.395250 0.416675 0.426733 0.436859 0.087479 0.099291 ... 0.002422 0.000494 0.003810 0.005245 0.001792 0.000360 0.001341 0.001579 0.002414 0.006504
465 5365049 0.249027 0.359487 0.011363 0.400873 0.407486 0.418052 0.419350 0.109777 0.212766 ... 0.001227 0.001253 0.009518 0.004420 0.001754 0.000264 0.002511 0.006233 0.001196 0.006299
466 5366244 0.496109 0.724488 0.003812 0.897613 0.939935 0.974635 0.982273 0.084048 0.049645 ... 0.009365 0.001020 0.003174 0.011517 0.001111 0.001759 0.005442 0.004622 0.029679 0.006400
467 5367698 0.418288 0.520909 0.016538 0.566194 0.521050 0.580986 0.527019 0.153516 0.198582 ... 0.000903 0.000851 0.035156 0.003198 0.010092 0.001589 0.015425 0.011758 0.000928 0.010794
468 5367706 0.394942 0.484422 0.017918 0.520519 0.473473 0.531162 0.477424 0.160377 0.219858 ... 0.000728 0.000872 0.037461 0.002727 0.010668 0.001113 0.010841 0.008469 0.000780 0.011687
469 5368076 0.270428 0.395974 0.010457 0.446548 0.455063 0.467877 0.468945 0.107204 0.191489 ... 0.002347 0.001215 0.011080 0.007561 0.002551 0.000465 0.002433 0.006989 0.003256 0.005653
472 6429333 0.295720 0.333351 0.020954 0.362138 0.308869 0.368878 0.311115 0.190395 0.198582 ... 0.000050 0.000180 0.026739 0.000505 0.009365 0.000598 0.006881 0.003624 0.000135 0.006830
473 6999977 0.196498 0.333377 0.013852 0.346674 0.365321 0.352816 0.372819 0.103774 0.333333 ... 0.002491 0.001310 0.013662 0.006364 0.003052 0.000476 0.003673 0.004628 0.002189 0.019025
475 16220109 0.392996 0.427208 0.012122 0.476460 0.471560 0.496154 0.483464 0.120069 0.191489 ... 0.001189 0.001205 0.008554 0.004207 0.001600 0.000355 0.002870 0.006900 0.001165 0.005487

338 rows × 14613 columns


In [ ]: