In [1]:
import numpy as np
import pandas as pd
import os,sys
from sklearn import linear_model
from scipy import stats as stats

In [2]:
# load lb, test and CV CIDs

# load LB CIDs
with open('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/CID_leaderboard.txt') as f: 
    content = f.readlines()
lb_CIDs = list(content)  
lb_CIDs = [int(x) for x in lb_CIDs]

# load test CIDs
with open("/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/CID_testset.txt") as f: 
    content = f.readlines()
test_CIDs = list(content)  
test_CIDs = [int(x) for x in test_CIDs]

In [4]:
# load morgan matrix to use them as weights in training
morgan = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/morgan_sim.csv', index_col=0)
weights = morgan[morgan.index.astype(str)]
weights = pd.concat((weights,weights)).sort_index()
print weights.shape
weights.head()


(952, 476)
Out[4]:
126 176 177 180 196 239 240 241 243 244 ... 5366244 5367698 5367706 5368076 5371102 6114390 6429333 6999977 10857465 16220109
0
126 1.000000 0.108108 0.171429 0.054054 0.066667 0.090909 0.509091 0.166667 0.315789 0.290909 ... 0.033613 0.183673 0.195652 0.051948 0.252632 0.237288 0.307692 0.066667 0.000000 0.050633
126 1.000000 0.108108 0.171429 0.054054 0.066667 0.090909 0.509091 0.166667 0.315789 0.290909 ... 0.033613 0.183673 0.195652 0.051948 0.252632 0.237288 0.307692 0.066667 0.000000 0.050633
176 0.108108 1.000000 0.285714 0.625000 0.256410 0.434783 0.058824 0.000000 0.277778 0.058824 ... 0.081633 0.103896 0.112676 0.142857 0.135135 0.082474 0.136364 0.256410 0.027397 0.172414
176 0.108108 1.000000 0.285714 0.625000 0.256410 0.434783 0.058824 0.000000 0.277778 0.058824 ... 0.081633 0.103896 0.112676 0.142857 0.135135 0.082474 0.136364 0.256410 0.027397 0.172414
177 0.171429 0.285714 1.000000 0.285714 0.054054 0.095238 0.187500 0.000000 0.058824 0.000000 ... 0.041667 0.080000 0.086957 0.111111 0.083333 0.084211 0.238095 0.108108 0.028169 0.142857

5 rows × 476 columns


In [3]:
#load the features
features = pd.read_csv('features_dragon_morgan.csv', index_col=0)
features.head()


Out[3]:
CID complexity from pubmed MW AMW Sv Se Sp Si Mv Me ... 91541756_2 91552833_2 91563027_2 91595028_2 91614181_2 91617014_2 91617930_2 91618238_2 neglog10d Intensity
0 126 0.181128 0.270753 0.030587 0.262264 0.219126 0.253846 0.214989 0.216981 0.425532 ... 0.014024 0.000296 0.021098 0.000186 0.003159 0.002299 0.000138 0.011080 1 1
1 126 0.181128 0.270753 0.030587 0.262264 0.219126 0.253846 0.214989 0.216981 0.425532 ... 0.014024 0.000296 0.021098 0.000186 0.003159 0.002299 0.000138 0.011080 3 0
2 176 0.060311 0.109331 0.025411 0.096943 0.105579 0.090940 0.107335 0.125214 0.659574 ... 0.008391 0.000930 0.001442 0.000094 0.000607 0.001362 0.000229 0.004162 5 1
3 176 0.060311 0.109331 0.025411 0.096943 0.105579 0.090940 0.107335 0.125214 0.659574 ... 0.008391 0.000930 0.001442 0.000094 0.000607 0.001362 0.000229 0.004162 7 0
4 177 0.020039 0.067721 0.015501 0.075556 0.083688 0.078074 0.089782 0.106346 0.382979 ... 0.000961 0.000339 0.000657 0.000008 0.000098 0.000221 0.000037 0.001932 3 1

5 rows × 14616 columns


In [46]:
]

In [7]:
# give a number for each descriptor
descriptor = {}
for idx, desc in enumerate([u'INTENSITY/STRENGTH', u'VALENCE/PLEASANTNESS', u'BAKERY', 
                       u'SWEET', u'FRUIT', u'FISH', u'GARLIC', u'SPICES', u'COLD', u'SOUR', u'BURNT',
                       u'ACID', u'WARM', u'MUSKY', u'SWEATY', u'AMMONIA/URINOUS', u'DECAYED', u'WOOD',
                       u'GRASS', u'FLOWER', u'CHEMICAL']):
    descriptor[idx] = desc

In [8]:
# load the targets
all_targets = pd.read_csv('target.csv', index_col=0)
all_targets.head()


Out[8]:
#oID individual INTENSITY/STRENGTH VALENCE/PLEASANTNESS BAKERY SWEET FRUIT FISH GARLIC SPICES ... ACID WARM MUSKY SWEATY AMMONIA/URINOUS DECAYED WOOD GRASS FLOWER CHEMICAL
0 126 25 49.551020 49.465116 0.674419 25.953488 6.581395 0.302326 1.720930 3.906977 ... 3.046512 0.790698 8.023256 1.604651 1.209302 5.069767 1.348837 1.441860 9.906977 14.813953
1 126 25 24.653061 49.465116 0.674419 25.953488 6.581395 0.302326 1.720930 3.906977 ... 3.046512 0.790698 8.023256 1.604651 1.209302 5.069767 1.348837 1.441860 9.906977 14.813953
2 176 25 11.551020 45.944444 3.666667 8.166667 1.777778 0.000000 10.388889 6.055556 ... 4.166667 6.111111 8.666667 2.166667 5.222222 4.388889 2.611111 2.166667 5.944444 4.222222
3 176 25 4.551020 45.944444 3.666667 8.166667 1.777778 0.000000 10.388889 6.055556 ... 4.166667 6.111111 8.666667 2.166667 5.222222 4.388889 2.611111 2.166667 5.944444 4.222222
4 177 25 33.265306 45.147059 9.411765 22.441176 1.676471 0.000000 0.705882 2.735294 ... 4.970588 4.470588 3.823529 2.176471 4.235294 3.558824 1.147059 4.470588 2.441176 18.794118

5 rows × 23 columns


In [9]:
scores = pd.read_csv('LB_scores/scores_' + str(0) + '.csv',index_col=0)

In [9]:
#load splits
trainsplits = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/cv_splits_train_big.csv',header=None)
testsplits = pd.read_csv('/media/gabor/H/python_from_C/final_ofaction_for_paper_2/data/cv_splits_test_big.csv',header=None)

In [20]:
# predict LB with different number of features
for k in range(10):
    print k
    # set a cv split as holout data
    lb_CIDs = testsplits.ix[k,:].values
    #for feature_number in [5,10,20,50,100,200,300,500,1000,1500,2000,3000,4000]:
    #for feature_number in [1,2,3,5,10,33,100,333,1000,3333,10000]:
    for feature_number in [1,2,3,4,5,10,33,100,333,1000,3333,10000]: #new run, with 1,2,3 features
        print(feature_number)
        sys.stdout.flush()

        targets = all_targets[~all_targets['#oID'].isin(test_CIDs)]# remove test data 
        features = features[~features.CID.isin(test_CIDs)] # remove test data 

        train_targets = targets[~targets['#oID'].isin(lb_CIDs)]  # exclude lb targets from training
        train_features = features[~features.CID.isin(lb_CIDs)] # exclude lb features from training
        test_features = features[features.CID.isin(lb_CIDs)] 

        # set the regressor
        regr = linear_model.Ridge(alpha=1, fit_intercept=True, normalize=False, copy_X=True, max_iter=None, tol=0.001, solver='auto')


        result = []
        for idx in range(21):

            #print(descriptor[idx])

            # load the scores for the descriptor
            scores = pd.read_csv('LB_scores_morgan' + str(k) + '/scores_' + str(idx) + '.csv',index_col=0)

            #exclude the data leak 
            scores = scores.loc[[x for x in scores.index if x not in['Intensity','neglog10d']] ].sort_values(by='0', ascending=0)

            X_all = train_features[scores.sort_values(by='0',ascending=0)[:feature_number].index] # set X values with the best features
            X_all['CID'] = train_features.CID # add the CIDs as a column
            

            for CID in lb_CIDs:

                Y_train = train_targets[['#oID',descriptor[idx]]]

                Y_train = Y_train[~Y_train[descriptor[idx]].isnull()]
                X = X_all[X_all.CID.isin(Y_train['#oID'])]
                weight = weights[weights.index.isin(Y_train['#oID'])][str(CID)]

                if idx == 0: # if predicting intensity, use 1/1000 dilutions (neglog10 of 1/1000 is 3)
                    test_data = test_features[test_features.neglog10d == 3]
                    test_data = test_data[test_data.CID == CID]
                    test_data = test_data[scores.sort_values(by='0',ascending=0)[:feature_number].index]

                else: # otherwise use high dilution data (not that they differ in this target matrix from the low ones)
                    test_data = test_features[test_features.Intensity == 1]
                    test_data = test_data[test_data.CID == CID]
                    test_data = test_data[scores.sort_values(by='0',ascending=0)[:feature_number].index]
                    
                    
                # in case the data frame lenght is zero, dont try to predict

                if len(test_data) == 0:
                    print 'zero data',CID
                else:
                    regr.fit(X.drop('CID',1),Y_train[descriptor[idx]], sample_weight = weight.values)

                    Y_test = regr.predict(test_data)
                    std = -(Y_test**2)/2500.0+Y_test/25.0
                    result.append([CID, descriptor[idx], Y_test,std])

        result = pd.DataFrame(result)
        result.columns = ['#oID', 'descriptor', 'value', 'sigma']

        result.value = result.value.astype(float)
        result.sigma = result.sigma.astype(float)

        # remove negative data and data above 100

        result.value[result.value < 0] = 0 
        result.value[result.value > 100] = 100

        result.sigma[result.sigma < 0] = 0


        #result_mean['sigma'] = -(result_mean.value**2)/2500.0+result_mean.value/25.0
        result.to_csv('results_morgan_noleak/' + str(k) + '/subchallenge2_' +str(feature_number) + '.txt',sep='\t',index =0)


0
4
/home/gabor/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:35: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
zero data 6501
zero data 8063
zero data 7991
zero data 556940
zero data 61199
zero data 176
zero data 263
zero data 61130
1
4
/home/gabor/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:74: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/gabor/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:75: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/gabor/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:77: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
zero data 18635
zero data 1549026
zero data 61945
zero data 23642
zero data 31252
zero data 7341
zero data 6920
zero data 7969
2
4
zero data 9609
zero data 7969
zero data 7894
zero data 61199
zero data 61130
zero data 7916
3
4
zero data 7916
zero data 10430
zero data 9016
zero data 12587
zero data 15717
zero data 18635
zero data 526618
zero data 7894
4
4
zero data 10430
zero data 10448
zero data 7991
zero data 61653
zero data 15380
zero data 101010
zero data 27458
zero data 10285
zero data 62725
zero data 61527
5
4
zero data 7150
zero data 556940
zero data 8118
zero data 6590
zero data 6429333
zero data 61641
zero data 7969
zero data 5362588
zero data 31252
zero data 61130
zero data 18635
zero data 61199
6
4
zero data 10797
zero data 10430
zero data 526618
zero data 7991
zero data 7894
zero data 263
zero data 5362588
zero data 18635
zero data 61130
zero data 9016
zero data 1032
7
4
zero data 61527
zero data 1549026
zero data 7144
zero data 263
zero data 10430
zero data 6501
zero data 526618
8
4
zero data 61199
zero data 8063
zero data 62725
zero data 1032
zero data 12587
zero data 61670
zero data 61130
9
4
zero data 7144
zero data 10448
zero data 1049
zero data 36822
zero data 23642

In [29]:
test_features[test_features.CID == CID]


Out[29]:
CID complexity from pubmed MW AMW Sv Se Sp Si Mv Me ... Hypertens-80_2 Hypertens-50_2 Hypnotic-80_2 Hypnotic-50_2 Neoplastic-80_2 Neoplastic-50_2 Infective-80_2 Infective-50_2 neglog10d Intensity
308 7916 0.183463 0.213539 0.018611 0.218204 0.217213 0.218838 0.221028 0.132933 0.375887 ... 0 0 0 0 0 0 0 0 5 1
309 7916 0.183463 0.213539 0.018611 0.218204 0.217213 0.218838 0.221028 0.132933 0.375887 ... 0 0 0 0 0 0 0 0 7 0

2 rows × 9741 columns


In [22]:
test_data = test_features[test_features.neglog10d == 3]
#test_data = test_data[test_data.CID == CID]
#test_data = test_data[scores.sort_values(by='0',ascending=0)[:feature_number].index]
test_data


Out[22]:
CID complexity from pubmed MW AMW Sv Se Sp Si Mv Me ... Hypertens-80_2 Hypertens-50_2 Hypnotic-80_2 Hypnotic-50_2 Neoplastic-80_2 Neoplastic-50_2 Infective-80_2 Infective-50_2 neglog10d Intensity
11 239 0.102724 0.184880 0.020081 0.173157 0.187816 0.169183 0.193197 0.108919 0.503546 ... 0 0 0 0 0 0 0 0 3 0
14 241 0.030156 0.156299 0.017265 0.189579 0.158847 0.199864 0.165365 0.186964 0.078014 ... 0 0 0 0 0 0 0 0 3 1
33 325 0.196498 0.343857 0.013183 0.393635 0.371028 0.411944 0.381254 0.138937 0.148936 ... 0 0 0 0 0 0 1 0 3 0
38 379 0.173735 0.328253 0.009412 0.370962 0.390989 0.389775 0.404832 0.095197 0.205674 ... 0 0 0 0 0 0 0 0 3 1
68 1001 0.126459 0.268335 0.013591 0.311468 0.288660 0.327932 0.300793 0.144940 0.113475 ... 0 0 0 0 0 0 0 0 3 1
92 2346 0.256809 0.341231 0.035778 0.315476 0.244267 0.331467 0.244026 0.239280 0.212766 ... 0 0 0 0 0 0 1 0 3 1
113 5541 0.445525 0.520675 0.025550 0.470627 0.464004 0.447941 0.454528 0.146655 0.567376 ... 0 0 0 0 1 0 1 0 3 0
130 6050 0.591440 0.739597 0.016644 0.744646 0.749431 0.746776 0.752025 0.121784 0.354610 ... 1 1 0 0 1 1 1 0 3 1
134 6057 0.342412 0.424399 0.025754 0.405540 0.370847 0.394752 0.367998 0.174099 0.432624 ... 0 0 1 0 1 0 1 0 3 1
145 6213 0.165953 0.197987 0.033990 0.148211 0.154410 0.159060 0.155754 0.127787 0.531915 ... 0 0 0 0 0 0 0 0 3 0
148 6276 0.038716 0.182435 0.004106 0.228344 0.257465 0.249039 0.273600 0.067753 0.134752 ... 0 0 0 0 0 0 0 0 3 1
186 6826 0.309339 0.382789 0.022758 0.384153 0.348939 0.381914 0.350431 0.169811 0.340426 ... 0 0 1 0 0 0 1 0 3 1
190 6943 0.192412 0.307370 0.014661 0.347990 0.323452 0.362148 0.331673 0.146655 0.163121 ... 0 0 0 0 0 0 0 0 3 1
194 6997 0.156809 0.270883 0.016620 0.302315 0.275875 0.312323 0.282078 0.156947 0.191489 ... 0 0 0 0 0 0 0 0 3 1
223 7151 0.243191 0.309815 0.029982 0.292803 0.253803 0.282293 0.251270 0.201544 0.453901 ... 0 0 0 0 0 0 0 0 3 0
245 7463 0.167704 0.302247 0.009779 0.372278 0.349121 0.399106 0.363687 0.133791 0.042553 ... 0 0 0 0 0 0 0 0 3 0
250 7519 0.107782 0.234396 0.019297 0.256640 0.228315 0.262527 0.232498 0.171527 0.219858 ... 0 0 0 0 0 0 0 0 3 1
300 7824 0.153696 0.291766 0.010334 0.325287 0.343429 0.339979 0.355251 0.096913 0.234043 ... 0 0 0 0 0 0 0 0 3 1
328 8030 0.044358 0.171981 0.040447 0.149527 0.112541 0.168392 0.114608 0.239280 0.163121 ... 0 0 0 0 0 0 0 0 3 1
342 8077 0.034047 0.271143 0.026513 0.232292 0.223514 0.279550 0.233166 0.127787 0.092199 ... 0 0 0 0 0 0 0 0 3 1
345 8078 0.030156 0.172059 0.002302 0.236869 0.252070 0.264450 0.270551 0.081475 0.007092 ... 0 0 0 0 0 0 0 0 3 0
348 8091 0.193969 0.364740 0.008677 0.416637 0.438566 0.439600 0.454427 0.093482 0.184397 ... 0 0 0 0 0 0 1 0 3 1
350 8093 0.149027 0.286643 0.006000 0.349575 0.369098 0.376937 0.387279 0.088336 0.113475 ... 0 0 0 0 0 0 0 0 3 1
381 8193 0.157977 0.437845 0.003135 0.548008 0.590452 0.597698 0.620677 0.075472 0.070922 ... 0 0 0 0 1 0 1 0 3 0
409 8697 0.193385 0.328253 0.009412 0.370962 0.390989 0.389775 0.404832 0.095197 0.205674 ... 0 0 0 0 0 0 0 0 3 0
420 8857 0.096304 0.182305 0.015501 0.188293 0.200716 0.190561 0.206510 0.106346 0.382979 ... 0 0 0 0 0 0 0 0 3 1
424 8892 0.134047 0.255279 0.011542 0.279612 0.295853 0.290154 0.305671 0.099485 0.269504 ... 0 0 0 0 0 0 0 0 3 1
431 8918 0.235409 0.437715 0.007583 0.507956 0.533703 0.539192 0.553588 0.091767 0.156028 ... 0 0 1 0 1 0 1 0 3 0
466 10882 0.153696 0.291766 0.010334 0.325287 0.343429 0.339979 0.355251 0.096913 0.234043 ... 0 0 0 0 0 0 0 0 3 1
526 12580 0.160895 0.307370 0.014661 0.347990 0.323452 0.362148 0.331673 0.146655 0.163121 ... 0 0 0 0 0 0 0 0 3 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
542 13216 0.258755 0.354364 0.007551 0.425162 0.433172 0.455011 0.451378 0.102058 0.106383 ... 0 0 0 0 0 0 1 0 3 1
548 14104 0.254864 0.380344 0.012016 0.439310 0.418589 0.461769 0.430834 0.132933 0.127660 ... 0 0 1 0 0 0 1 0 3 1
550 14228 0.301556 0.547176 0.006498 0.644981 0.676416 0.688638 0.702343 0.089194 0.127660 ... 0 0 0 0 0 0 0 0 3 1
562 14514 0.233463 0.276006 0.023738 0.278027 0.250206 0.275393 0.250065 0.176672 0.361702 ... 0 0 0 0 0 0 0 0 3 1
612 22386 0.052724 0.182435 0.004106 0.228344 0.257465 0.249039 0.273600 0.067753 0.134752 ... 0 0 0 0 0 0 0 0 3 1
617 23235 0.332685 0.489675 0.015159 0.536283 0.504553 0.552709 0.512500 0.143225 0.198582 ... 1 0 0 0 1 0 1 0 3 0
655 31249 0.262646 0.406221 0.018824 0.397972 0.403708 0.393960 0.404890 0.124357 0.418440 ... 0 0 0 0 0 0 0 0 3 0
696 61138 0.148054 0.213539 0.018611 0.218204 0.217213 0.218838 0.221028 0.132933 0.375887 ... 0 0 0 0 0 0 0 0 3 1
698 61151 0.303502 0.385338 0.025787 0.374970 0.336171 0.366305 0.331717 0.181818 0.411348 ... 0 0 1 0 0 0 1 0 3 1
702 61177 0.120428 0.291766 0.010334 0.325287 0.343429 0.339979 0.355251 0.096913 0.234043 ... 0 0 0 0 0 0 0 0 3 1
704 61185 0.280156 0.510689 0.006808 0.599306 0.628840 0.638813 0.652748 0.090051 0.134752 ... 0 0 0 0 0 0 1 0 3 1
706 61192 0.317121 0.434958 0.029876 0.389746 0.350210 0.400209 0.349270 0.177530 0.361702 ... 0 0 1 0 1 0 1 0 3 1
710 61204 0.299611 0.432461 0.009714 0.492223 0.502639 0.517673 0.518526 0.104631 0.177305 ... 0 0 1 0 1 0 1 0 3 1
724 61337 0.297665 0.348851 0.029493 0.329325 0.288594 0.316508 0.282137 0.198113 0.475177 ... 0 0 0 0 0 0 0 0 3 1
746 61918 0.285992 0.343753 0.022505 0.347661 0.314148 0.347670 0.319565 0.170669 0.304965 ... 0 0 0 0 0 0 0 0 3 1
760 62374 0.478599 0.348981 0.018146 0.369347 0.345343 0.374986 0.349226 0.151801 0.269504 ... 0 0 0 0 0 0 0 0 3 1
766 62444 0.118872 0.260585 0.020889 0.248086 0.244152 0.274092 0.251880 0.126072 0.212766 ... 0 0 0 0 0 0 0 0 3 1
770 62572 0.175681 0.296890 0.015501 0.300999 0.317760 0.303020 0.323224 0.106346 0.382979 ... 0 0 0 0 0 0 0 0 3 1
784 78925 0.031907 0.192864 0.022007 0.172499 0.180079 0.196019 0.187781 0.107204 0.248227 ... 0 0 0 0 0 0 0 0 3 1
793 89440 0.712062 0.625247 0.011004 0.727477 0.689432 0.767362 0.707788 0.132075 0.099291 ... 0 0 0 0 0 0 1 0 3 0
796 93375 0.307393 0.312624 0.011142 0.363723 0.354532 0.383667 0.366735 0.123499 0.141844 ... 0 0 0 0 0 0 0 0 3 1
811 159055 0.422179 0.349111 0.010155 0.409398 0.402092 0.433492 0.416316 0.119211 0.127660 ... 0 0 0 0 0 0 1 0 3 0
813 165675 0.233463 0.359617 0.005281 0.440925 0.464235 0.476558 0.486440 0.087479 0.092199 ... 0 0 0 0 0 0 1 0 3 0
815 170833 0.293774 0.354364 0.007551 0.425162 0.433172 0.455011 0.451378 0.102058 0.106383 ... 0 0 0 0 0 0 1 0 3 0
831 440967 0.344358 0.307500 0.006906 0.388011 0.380201 0.420625 0.398763 0.114065 0.028369 ... 0 0 0 0 0 0 0 0 3 0
838 519539 0.165370 0.320503 0.021779 0.317091 0.289914 0.346228 0.299631 0.152659 0.141844 ... 0 0 0 0 0 0 0 0 3 1
852 565690 0.476654 0.578409 0.007738 0.674892 0.692913 0.716916 0.716862 0.098628 0.127660 ... 0 0 0 0 1 0 1 0 3 1
862 637776 0.322957 0.416701 0.018016 0.444933 0.409416 0.453088 0.413325 0.156947 0.241135 ... 0 0 1 0 0 0 1 0 3 1
923 5363491 0.282101 0.317747 0.016146 0.339435 0.328846 0.346709 0.334708 0.135506 0.269504 ... 0 0 0 0 0 0 0 0 3 0
942 6114390 0.669261 0.687454 0.023224 0.707227 0.608928 0.706934 0.602645 0.192110 0.269504 ... 1 0 0 0 1 0 1 0 3 1

61 rows × 9741 columns


In [16]:
CID


Out[16]:
6501

In [ ]: