In [2]:
# Preliminaries to work with the data.   
%matplotlib inline
%run __init__.py
from utils import loading, scoring
from gerkin import dream,params
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
DATA = '../../data/'

In [3]:
# Load the data
descriptors = loading.get_descriptors(format='True')
sets = ['training','leaderboard']
all_CIDs = []
for set_ in sets:
    all_CIDs += loading.get_CIDs(set_)
all_CIDs = sorted(all_CIDs)
mdx = dream.get_molecular_data(['dragon','episuite','morgan','nspdk','gramian',],all_CIDs)
mdx_onlydragon = dream.get_molecular_data(['dragon',],all_CIDs)


Dragon has 4869 features for 407 molecules.
Episuite has 62 features for 407 molecules.
Morgan has 2437 features for 407 molecules.
NSPDK has 5392 features for 407 molecules.
NSPDK Gramian has 2437 features for 407 molecules.
There are now 15197 total features.
Dragon has 4869 features for 407 molecules.
There are now 4869 total features.

In [17]:
# Create the feature and descriptor arrays 
X_forest,good1,good2,means,stds,imputer = dream.make_X(mdx,sets)
X_forest_onlydragon,good1_onlydragon,good2_onlydragon,means_onlydragon,stds_onlydragon,imputer_onlydragon = dream.make_X(mdx_onlydragon,sets)
# -1 removes the CID; -6 removes six NaN-heavy episuite features; +2 adds the dilution information
assert len(good1) == len(mdx[0]) -1 +2 -6
Y_all,imputer = dream.make_Y_obs(sets,target_dilution=None,imputer='mask')


The X matrix now has shape (814x13380) molecules by non-NaN good molecular descriptors
The X matrix now has shape (814x3063) molecules by non-NaN good molecular descriptors
The Y['mean_std'] matrix now has shape (814x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (814x21) molecules by perceptual descriptors, one for each subject

Load or compute the random forest model


In [18]:
# Load or compute the random forest model correlations (obtained from cross-validation)
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor
#trans_params = params.get_trans_params(Y_all, descriptors, plot=False)
use_et, max_features, max_depth, min_samples_leaf, trans_weight, regularize, use_mask = params.get_other_params()

def compute_importance_ranks(X,Y,n_estimators=50,
                  max_features='auto',
                  max_depth=None,min_samples_leaf=1,
                  random_state=0):
    importances = np.zeros((21,X.shape[1])) # Empty matrix to store feature importances.  
    importance_ranks = np.zeros((21,X.shape[1])) # Empty matrix to store feature importance ranks.  
    for col in range(0,21): # For each descriptor.  
        print(col)
        observed = Y[:,col] # Perceptual data for this descriptor.  
        if col==0:
            est = ExtraTreesRegressor(n_estimators=n_estimators,max_features=max_features,max_depth=max_depth,
                                      min_samples_leaf=min_samples_leaf,n_jobs=8,random_state=random_state)
        else:
            est = RandomForestRegressor(n_estimators=n_estimators,max_features=max_features,max_depth=max_depth,
                                      min_samples_leaf=min_samples_leaf,oob_score=False,n_jobs=8,random_state=random_state)
        est.fit(X,observed) # Fit the model on the training data.  
        importances[col,:] = est.feature_importances_
        importance_ranks[col,:] = np.argsort(est.feature_importances_)[::-1] # Use feature importances to get ranks.  

    return importances,importance_ranks

if False: 
    importances,importance_ranks = compute_importance_ranks(X_forest[:,:-1],Y_all['mean_std'],n_estimators=50)
    np.save('../../data/importances_forest',importances)
    np.save('../../data/importance_ranks_forest',importance_ranks)
else:
    importances = np.load('../../data/importances_forest.npy')
    importance_ranks = np.load('../../data/importance_ranks_forest.npy')
    
if True: 
    importances_onlydragon,importance_ranks_onlydragon = compute_importance_ranks(X_forest_onlydragon[:,:-1],Y_all['mean_std'],n_estimators=50)
    np.save('../../data/importances_forest_onlydragon',importances_onlydragon)
    np.save('../../data/importance_ranks_forest_onlydragon',importance_ranks_onlydragon)
else:
    importances_onlydragon = np.load('../../data/importances_forest_onlydragon.npy')
    importance_ranks_onlydragon = np.load('../../data/importance_ranks_forest_onlydragon.npy')


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

In [5]:
nspdk_CIDs = pd.read_csv('%s/derived/nspdk_cid.csv' % DATA, header=None, dtype='int').as_matrix().squeeze()
nspdk_dict = dream.make_nspdk_dict(all_CIDs)
nspdk_feature_numbers = list(nspdk_dict.keys())
x = pd.read_table('%s/DREAM_episuite_descriptors.txt' % DATA,index_col=0).drop('SMILES',1)
x = x.loc[all_CIDs]
x.iloc[:,47] = 1*(x.iloc[:,47]=='YES ')
episuite_names = list(x)
episuite = x.as_matrix()
_,good = dream.purge1_X(episuite)
episuite_names = [e for i,e in enumerate(episuite_names) if i in good]
    
with open('%s/morgan_sim.csv' % DATA) as f:
     x = f.readline()
     morgan_template_CIDs = [int(xi) for xi in x.split(',')[1:]]
    
assert len(morgan_template_CIDs) == len(nspdk_CIDs) == 2437

In [6]:
from utils import loading
headers,_ = loading.load_molecular_data()
dragon_feature_names = headers[1:]
# Replace with nspdkgramian range with nspdk_CIDs
all_feature_names = ['dragon_%s' % s for s in dragon_feature_names] + \
                    ['episuite_%s' % x for x in episuite_names] + \
                    ['morgan_%d' % x for x in morgan_template_CIDs] + \
                    ['nspdk_%s' % s for s in nspdk_feature_numbers] + \
                    ['nspdkgramian_%d' % i for i in range(2437)] + ['conc_absolute','conc_relative']
good_feature_names = [all_feature_names[i] for i in good2]
good_feature_names = good_feature_names[:-1] # Remove relative dilution since we didn't use this for the fit
assert(len(good_feature_names) == importance_ranks.shape[1])

In [7]:
new_ranks = pd.DataFrame(importances.T,index=good_feature_names,columns=descriptors)
old_ranks = pd.read_csv('/Users/rgerkin/Desktop/feature_importances.csv',index_col=0)
old_ranks.columns = descriptors
old_ranks.drop('conc_relative',inplace=True)
#old_ranks.drop([s for s in list(old_ranks.index) if 'episuite_' in s],inplace=True)

new_ranks.drop('conc_absolute',inplace=True)
old_ranks.drop('conc_absolute',inplace=True)

In [8]:
desc = 'Pleasantness'
from IPython.display import display
#pd.set_option('display.precision',2)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
display(new_ranks.sort_values(desc,ascending=False)[[desc]].head(10))
display(old_ranks.sort_values(desc,ascending=False)[[desc]].head(10))


Pleasantness
nspdkgramian_1249 0.0557
nspdkgramian_764 0.0369
morgan_90978 0.0206
dragon_SssO 0.0184
dragon_P_VSA_MR_8 0.0161
dragon_nHM 0.0116
dragon_RDF015s 0.0103
nspdkgramian_199 0.0101
dragon_P_VSA_i_1 0.0089
dragon_MAXDP 0.0074
Pleasantness
nspdkgramian_1249 0.0466
dragon_RDF015s 0.0263
dragon_SssO 0.0192
nspdkgramian_764 0.0157
dragon_MAXDP 0.0147
dragon_P_VSA_i_1 0.0130
morgan_90978 0.0101
dragon_P_VSA_MR_8 0.0094
nspdkgramian_390 0.0080
dragon_RDF025s 0.0075

In [9]:
nspdk_CIDs[1249],nspdk_CIDs[764],nspdk_CIDs[390]


Out[9]:
(36314, 8767, 62902)

In [10]:
diff = list(set(list(old_ranks.index)).difference(new_ranks.index))
old_ranks_2 = old_ranks.drop(diff,0).loc[new_ranks.index]
fig,axes = plt.subplots(7,3,figsize=(7,16))
for i,desc in enumerate(descriptors):
    old_sorted = list(old_ranks_2.sort_values(desc,ascending=False).index)
    new_sorted = list(new_ranks.sort_values(desc,ascending=False).index)
    print(desc)
    for j in range(10):
        old_name = old_sorted[j]
        print("\t%d -> %d (%s)" % (j,new_sorted.index(old_name),old_name))
    ax = axes.flat[i]
    #print(desc)
    old = old_ranks_2[desc]
    new = new_ranks[desc]
    ax.scatter(old,new)
    ax.set_xlim(0.0001,1)
    ax.set_ylim(0.0001,1)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_title(desc)
plt.tight_layout()


Intensity
	0 -> 0 (dragon_B03[C-S])
	1 -> 2 (dragon_F03[C-S])
	2 -> 3 (dragon_Eig07_AEA(dm))
	3 -> 39 (dragon_Eig08_AEA(dm))
	4 -> 6 (dragon_O-057)
	5 -> 4 (dragon_C-028)
	6 -> 4710 (dragon_H-049)
	7 -> 9 (dragon_Mor30p)
	8 -> 360 (dragon_Eig10_AEA(dm))
	9 -> 41 (dragon_SpAbs_B(s))
Pleasantness
	0 -> 0 (nspdkgramian_1249)
	1 -> 6 (dragon_RDF015s)
	2 -> 3 (dragon_SssO)
	3 -> 1 (nspdkgramian_764)
	4 -> 9 (dragon_MAXDP)
	5 -> 8 (dragon_P_VSA_i_1)
	6 -> 2 (morgan_90978)
	7 -> 4 (dragon_P_VSA_MR_8)
	8 -> 13 (nspdkgramian_390)
	9 -> 55 (dragon_RDF025s)
Bakery
	0 -> 14 (nspdk_253902)
	1 -> 17 (nspdk_1022278)
	2 -> 24 (nspdk_722140)
	3 -> 16 (nspdk_250366)
	4 -> 19 (nspdk_555472)
	5 -> 4 (nspdk_544811)
	6 -> 5 (nspdk_214067)
	7 -> 11756 (nspdk_430273)
	8 -> 9 (nspdk_199698)
	9 -> 23 (nspdk_410940)
Sweet
	0 -> 0 (nspdkgramian_390)
	1 -> 8 (nspdkgramian_199)
	2 -> 10 (morgan_102861)
	3 -> 4 (morgan_8467)
	4 -> 17 (nspdk_939920)
	5 -> 16 (nspdk_338410)
	6 -> 23 (morgan_158410)
	7 -> 77 (nspdk_248294)
	8 -> 9 (nspdkgramian_2183)
	9 -> 7563 (nspdk_387269)
Fruit
	0 -> 0 (nspdkgramian_390)
	1 -> 1 (morgan_102861)
	2 -> 2 (nspdk_338410)
	3 -> 5327 (morgan_5362622)
	4 -> 7 (morgan_5352162)
	5 -> 5 (morgan_38415)
	6 -> 9 (morgan_31265)
	7 -> 3 (nspdkgramian_2166)
	8 -> 20 (nspdkgramian_1342)
	9 -> 674 (morgan_31249)
Fish
	0 -> 11429 (nspdk_390378)
	1 -> 0 (dragon_P_VSA_m_4)
	2 -> 32 (nspdk_517164)
	3 -> 5619 (dragon_nS)
	4 -> 1 (nspdkgramian_1876)
	5 -> 26 (nspdkgramian_787)
	6 -> 222 (dragon_R4p+)
	7 -> 4 (dragon_NssS)
	8 -> 5 (nspdkgramian_2226)
	9 -> 1408 (dragon_R1p+)
Garlic
	0 -> 6 (nspdk_390378)
	1 -> 10828 (nspdk_517164)
	2 -> 0 (dragon_HATS3p)
	3 -> 4 (nspdk_264584)
	4 -> 8534 (dragon_nS)
	5 -> 3 (nspdkgramian_284)
	6 -> 3789 (nspdkgramian_839)
	7 -> 1770 (dragon_R1p+)
	8 -> 1 (nspdkgramian_787)
	9 -> 13 (nspdkgramian_786)
Spices
	0 -> 0 (nspdkgramian_1422)
	1 -> 2843 (nspdkgramian_1571)
	2 -> 4811 (nspdkgramian_830)
	3 -> 6096 (nspdk_692636)
	4 -> 8161 (nspdk_465757)
	5 -> 34 (morgan_10364)
	6 -> 1139 (dragon_HATSp)
	7 -> 1 (nspdkgramian_787)
	8 -> 2 (morgan_7461)
	9 -> 3169 (nspdkgramian_1162)
Cold
	0 -> 3 (morgan_8748)
	1 -> 4473 (morgan_442347)
	2 -> 0 (morgan_94174)
	3 -> 98 (morgan_519743)
	4 -> 51 (dragon_Mor14s)
	5 -> 14 (morgan_530420)
	6 -> 66 (morgan_289151)
	7 -> 20 (dragon_R5u)
	8 -> 12 (morgan_69091)
	9 -> 4687 (dragon_Eig07_EA)
Sour
	0 -> 0 (morgan_264)
	1 -> 770 (dragon_SM06_EA(dm))
	2 -> 3 (morgan_15395)
	3 -> 211 (dragon_R4p+)
	4 -> 36 (nspdkgramian_970)
	5 -> 1 (dragon_Mor13m)
	6 -> 205 (dragon_X5A)
	7 -> 157 (dragon_SM02_EA(dm))
	8 -> 413 (nspdkgramian_582)
	9 -> 12 (nspdkgramian_1249)
Burnt
	0 -> 0 (dragon_F04[C-S])
	1 -> 9310 (nspdk_1036848)
	2 -> 5 (dragon_SM04_EA(dm))
	3 -> 4 (dragon_R3p+)
	4 -> 26 (morgan_14072)
	5 -> 56 (morgan_11420)
	6 -> 48 (nspdkgramian_1601)
	7 -> 3 (nspdk_229520)
	8 -> 24 (dragon_HATS3v)
	9 -> 10 (morgan_7936)
Acid
	0 -> 14 (dragon_ATSC2s)
	1 -> 2 (dragon_SpMax4_Bh(s))
	2 -> 1 (dragon_ATSC1s)
	3 -> 8 (morgan_6429278)
	4 -> 4 (dragon_HTs)
	5 -> 0 (dragon_P1p)
	6 -> 3 (morgan_66335)
	7 -> 10 (episuite_Estimated BP (oC))
	8 -> 1080 (dragon_RDF020i)
	9 -> 32 (nspdkgramian_376)
Warm
	0 -> 0 (dragon_Mor17s)
	1 -> 6 (morgan_8467)
	2 -> 1 (nspdkgramian_199)
	3 -> 1832 (dragon_TDB04p)
	4 -> 2340 (morgan_6429126)
	5 -> 143 (dragon_Mor15p)
	6 -> 3 (dragon_R6e+)
	7 -> 4627 (nspdkgramian_1938)
	8 -> 111 (morgan_9551283)
	9 -> 125 (dragon_Eig05_EA(dm))
Musky
	0 -> 0 (nspdkgramian_1249)
	1 -> 1 (dragon_GATS2e)
	2 -> 25 (dragon_R4p+)
	3 -> 256 (dragon_GATS2s)
	4 -> 10826 (morgan_15395)
	5 -> 143 (dragon_GATS5s)
	6 -> 4184 (dragon_SssO)
	7 -> 8 (dragon_GATS5e)
	8 -> 18 (morgan_40923)
	9 -> 29 (nspdkgramian_290)
Sweaty
	0 -> 8181 (nspdk_394468)
	1 -> 23 (dragon_GATS2s)
	2 -> 439 (nspdkgramian_559)
	3 -> 217 (nspdkgramian_497)
	4 -> 2 (nspdkgramian_1249)
	5 -> 7 (dragon_Mor13m)
	6 -> 5 (morgan_264)
	7 -> 11 (morgan_15395)
	8 -> 13032 (nspdk_679352)
	9 -> 2207 (nspdkgramian_1380)
Ammonia
	0 -> 10 (dragon_ATSC2s)
	1 -> 9 (dragon_F01[C-O])
	2 -> 0 (dragon_SssO)
	3 -> 40 (dragon_Mor21m)
	4 -> 1675 (dragon_ATSC2e)
	5 -> 69 (dragon_F02[C-O])
	6 -> 675 (dragon_R4p+)
	7 -> 36 (nspdk_827740)
	8 -> 1522 (dragon_RTp+)
	9 -> 7873 (nspdk_186640)
Decayed
	0 -> 1 (nspdk_390378)
	1 -> 12 (nspdkgramian_1313)
	2 -> 4761 (dragon_SM09_EA(dm))
	3 -> 9322 (nspdkgramian_700)
	4 -> 9205 (dragon_SM15_EA(dm))
	5 -> 22 (dragon_Mor10m)
	6 -> 5218 (nspdkgramian_301)
	7 -> 4592 (nspdkgramian_786)
	8 -> 19 (nspdkgramian_1876)
	9 -> 6354 (dragon_SM06_EA(dm))
Wood
	0 -> 2 (morgan_37580)
	1 -> 3352 (morgan_28905)
	2 -> 0 (morgan_70752)
	3 -> 1714 (morgan_61918)
	4 -> 381 (morgan_6429280)
	5 -> 1297 (morgan_519203)
	6 -> 383 (morgan_2519)
	7 -> 10918 (nspdk_952466)
	8 -> 7676 (nspdkgramian_530)
	9 -> 9859 (morgan_28906)
Grass
	0 -> 98 (morgan_5352539)
	1 -> 7904 (nspdk_426606)
	2 -> 9278 (nspdkgramian_2332)
	3 -> 8 (morgan_16220109)
	4 -> 12681 (nspdk_526261)
	5 -> 900 (nspdk_868779)
	6 -> 2311 (nspdkgramian_465)
	7 -> 2 (nspdkgramian_1432)
	8 -> 13355 (nspdk_731085)
	9 -> 6660 (nspdkgramian_2065)
Flower
	0 -> 4905 (dragon_SpMax4_Bh(m))
	1 -> 0 (nspdkgramian_764)
	2 -> 11 (dragon_piPC06)
	3 -> 4357 (nspdk_698)
	4 -> 86 (nspdkgramian_1421)
	5 -> 5867 (dragon_BLTF96)
	6 -> 703 (nspdkgramian_1249)
	7 -> 12891 (nspdk_203080)
	8 -> 667 (dragon_RDF050u)
	9 -> 18 (dragon_WiA_Dt)
Chemical
	0 -> 1 (dragon_ATSC2s)
	1 -> 0 (dragon_TPSA(Tot))
	2 -> 4 (morgan_5054)
	3 -> 5 (dragon_RDF020e)
	4 -> 2 (dragon_SM1_Dz(m))
	5 -> 6 (dragon_SpMax4_Bh(s))
	6 -> 404 (episuite_Estimated VP (Pa))
	7 -> 3 (dragon_SM1_Dz(Z))
	8 -> 54 (nspdkgramian_1349)
	9 -> 7093 (dragon_CATS2D_03_LL)

In [11]:
for i,desc in enumerate(descriptors):
    new_sorted = list(new_ranks.sort_values(desc,ascending=False).index)
    print(desc)
    for j in range(5):
        new_name = new_sorted[j]
        if 'nspdkgramian_' in new_name:
            _,index = new_name.split('_')
            cid = nspdk_CIDs[int(index)]
            new_name = 'nspdkgramian_%d' % cid
        print("\t%d) %s" % (j+1,new_name))


Intensity
	1) dragon_B03[C-S]
	2) dragon_LLS_01
	3) dragon_F03[C-S]
	4) dragon_Eig07_AEA(dm)
	5) dragon_C-028
Pleasantness
	1) nspdkgramian_36314
	2) nspdkgramian_8767
	3) morgan_90978
	4) dragon_SssO
	5) dragon_P_VSA_MR_8
Bakery
	1) nspdkgramian_539829
	2) morgan_12127
	3) nspdkgramian_8655
	4) nspdkgramian_1183
	5) nspdk_544811
Sweet
	1) nspdkgramian_62902
	2) dragon_SssO
	3) nspdkgramian_8655
	4) morgan_12127
	5) morgan_8467
Fruit
	1) nspdkgramian_62902
	2) morgan_102861
	3) nspdk_338410
	4) nspdkgramian_5367682
	5) nspdk_993867
Fish
	1) dragon_P_VSA_m_4
	2) nspdkgramian_522181
	3) dragon_X4Av
	4) dragon_P_VSA_i_1
	5) dragon_NssS
Garlic
	1) dragon_HATS3p
	2) nspdkgramian_9264
	3) dragon_P_VSA_m_4
	4) nspdkgramian_15380
	5) nspdk_264584
Spices
	1) nspdkgramian_68252
	2) nspdkgramian_9264
	3) morgan_7461
	4) morgan_80790
	5) morgan_84398
Cold
	1) morgan_94174
	2) dragon_R5i
	3) dragon_TPSA(Tot)
	4) morgan_8748
	5) dragon_H0m
Sour
	1) morgan_264
	2) dragon_Mor13m
	3) nspdkgramian_360253
	4) morgan_15395
	5) morgan_61138
Burnt
	1) dragon_F04[C-S]
	2) nspdkgramian_61282
	3) nspdk_775138
	4) nspdk_229520
	5) dragon_R3p+
Acid
	1) dragon_P1p
	2) dragon_ATSC1s
	3) dragon_SpMax4_Bh(s)
	4) morgan_66335
	5) dragon_HTs
Warm
	1) dragon_Mor17s
	2) nspdkgramian_8467
	3) morgan_969516
	4) dragon_R6e+
	5) nspdk_295118
Musky
	1) nspdkgramian_36314
	2) dragon_GATS2e
	3) dragon_RDF025s
	4) morgan_239
	5) morgan_71299776
Sweaty
	1) dragon_GATS2e
	2) nspdkgramian_12587
	3) nspdkgramian_36314
	4) morgan_10430
	5) nspdkgramian_145742
Ammonia
	1) dragon_SssO
	2) dragon_R3u
	3) nspdkgramian_60997
	4) dragon_CATS2D_04_AL
	5) nspdkgramian_8892
Decayed
	1) nspdkgramian_878
	2) nspdk_390378
	3) nspdkgramian_13852
	4) morgan_91618238
	5) morgan_15395
Wood
	1) morgan_70752
	2) dragon_Mor32e
	3) morgan_37580
	4) morgan_90190
	5) nspdkgramian_5460221
Grass
	1) morgan_5367681
	2) nspdkgramian_44246385
	3) nspdkgramian_69389
	4) morgan_25812
	5) nspdkgramian_5367681
Flower
	1) nspdkgramian_8767
	2) nspdk_360603
	3) morgan_21070732
	4) nspdkgramian_8615
	5) dragon_piPC05
Chemical
	1) dragon_TPSA(Tot)
	2) dragon_ATSC2s
	3) dragon_SM1_Dz(m)
	4) dragon_SM1_Dz(Z)
	5) morgan_5054

In [15]:
for i,desc in enumerate(descriptors):
    old_sorted = list(old_ranks.sort_values(desc,ascending=False).index)
    print(desc)
    for j in range(20):
        old_name = old_sorted[j]
        if 'nspdkgramian_' in old_name:
            _,index = old_name.split('_')
            cid = nspdk_CIDs[int(index)]
            new_name = 'nspdkgramian_%d' % cid
        else:
            new_name = old_name
        print("\t%d) %s" % (j+1,new_name))


Intensity
	1) dragon_B03[C-S]
	2) dragon_F03[C-S]
	3) dragon_Eig07_AEA(dm)
	4) dragon_Eig08_AEA(dm)
	5) dragon_O-057
	6) dragon_C-028
	7) dragon_H-049
	8) dragon_Mor30p
	9) dragon_Eig10_AEA(dm)
	10) dragon_SpAbs_B(s)
	11) dragon_DLS_01
	12) dragon_C-008
	13) dragon_Psi_i_s
	14) dragon_MATS7e
	15) morgan_16997
	16) nspdk_102816
	17) dragon_ATS2s
	18) dragon_Eig04_AEA(dm)
	19) dragon_ATS1s
	20) dragon_Ho_B(s)
Pleasantness
	1) nspdkgramian_36314
	2) dragon_RDF015s
	3) dragon_SssO
	4) nspdkgramian_8767
	5) dragon_MAXDP
	6) dragon_P_VSA_i_1
	7) morgan_90978
	8) dragon_P_VSA_MR_8
	9) nspdkgramian_62902
	10) dragon_RDF025s
	11) dragon_CATS2D_04_AL
	12) dragon_SpPosA_B(m)
	13) nspdkgramian_8467
	14) dragon_nHM
	15) dragon_Mor05s
	16) dragon_H0p
	17) dragon_R2s+
	18) morgan_6988
	19) nspdkgramian_1118
	20) morgan_8467
Bakery
	1) nspdk_253902
	2) nspdk_1022278
	3) nspdk_722140
	4) nspdk_250366
	5) nspdk_555472
	6) nspdk_544811
	7) nspdk_214067
	8) nspdk_430273
	9) nspdk_199698
	10) nspdk_410940
	11) morgan_12127
	12) morgan_24955
	13) nspdk_160914
	14) nspdk_519431
	15) nspdk_187641
	16) nspdk_303242
	17) nspdk_867849
	18) morgan_30231
	19) nspdk_296463
	20) nspdk_467866
Sweet
	1) nspdkgramian_62902
	2) nspdkgramian_8467
	3) morgan_102861
	4) morgan_8467
	5) nspdk_939920
	6) nspdk_338410
	7) morgan_158410
	8) nspdk_248294
	9) nspdkgramian_5462814
	10) nspdk_387269
	11) nspdkgramian_155708
	12) nspdkgramian_24020
	13) dragon_Mor26v
	14) nspdk_754869
	15) dragon_CATS2D_04_AL
	16) nspdk_544811
	17) nspdk_842150
	18) nspdk_214067
	19) nspdk_199698
	20) morgan_24197
Fruit
	1) nspdkgramian_62902
	2) morgan_102861
	3) nspdk_338410
	4) morgan_5362622
	5) morgan_5352162
	6) morgan_38415
	7) morgan_31265
	8) nspdkgramian_5367682
	9) nspdkgramian_61467
	10) morgan_31249
	11) nspdk_33461
	12) morgan_61451
	13) morgan_31214
	14) nspdkgramian_61408
	15) nspdkgramian_24020
	16) nspdkgramian_5362622
	17) nspdkgramian_10882
	18) nspdk_230039
	19) nspdk_1018866
	20) nspdkgramian_11086
Fish
	1) nspdk_390378
	2) dragon_P_VSA_m_4
	3) nspdk_517164
	4) dragon_nS
	5) nspdkgramian_522181
	6) nspdkgramian_9264
	7) dragon_R4p+
	8) dragon_NssS
	9) nspdkgramian_6428817
	10) dragon_R1p+
	11) nspdkgramian_8013
	12) nspdkgramian_10902
	13) dragon_P_VSA_MR_8
	14) nspdkgramian_9865
	15) dragon_SpPosA_B(m)
	16) dragon_B01[C-S]
	17) dragon_SssS
	18) dragon_F01[C-S]
	19) nspdkgramian_10451
	20) nspdkgramian_9258
Garlic
	1) nspdk_390378
	2) nspdk_517164
	3) dragon_HATS3p
	4) nspdk_264584
	5) dragon_nS
	6) nspdkgramian_15380
	7) nspdkgramian_10902
	8) dragon_R1p+
	9) nspdkgramian_9264
	10) nspdkgramian_9258
	11) dragon_P_VSA_m_4
	12) nspdkgramian_19310
	13) dragon_F01[C-S]
	14) dragon_S-107
	15) nspdkgramian_16592
	16) dragon_HATS2p
	17) dragon_R3p+
	18) dragon_R4p+
	19) dragon_B02[C-S]
	20) nspdkgramian_8013
Spices
	1) nspdkgramian_68252
	2) nspdkgramian_93135
	3) nspdkgramian_10812
	4) nspdk_692636
	5) nspdk_465757
	6) morgan_10364
	7) dragon_HATSp
	8) nspdkgramian_9264
	9) morgan_7461
	10) nspdkgramian_26289
	11) nspdkgramian_529345
	12) nspdkgramian_13403
	13) nspdkgramian_6989
	14) nspdk_863755
	15) morgan_7462
	16) nspdkgramian_79045
	17) nspdk_168888
	18) nspdkgramian_9258
	19) morgan_76152
	20) nspdkgramian_14529
Cold
	1) morgan_8748
	2) morgan_442347
	3) morgan_94174
	4) morgan_519743
	5) dragon_Mor14s
	6) morgan_530420
	7) morgan_289151
	8) dragon_R5u
	9) morgan_69091
	10) dragon_Eig07_EA
	11) morgan_9855795
	12) morgan_520687
	13) dragon_H0m
	14) nspdk_620684
	15) morgan_94334
	16) morgan_6428436
	17) morgan_119831
	18) morgan_521185
	19) dragon_Eig07_EA(bo)
	20) morgan_90978
Sour
	1) morgan_264
	2) dragon_SM06_EA(dm)
	3) morgan_15395
	4) dragon_R4p+
	5) nspdkgramian_13403
	6) dragon_Mor13m
	7) dragon_X5A
	8) dragon_SM02_EA(dm)
	9) nspdkgramian_7193
	10) nspdkgramian_36314
	11) morgan_61138
	12) nspdk_390378
	13) dragon_SM04_EA(dm)
	14) nspdk_835071
	15) dragon_SpPosA_B(m)
	16) nspdkgramian_24181353
	17) dragon_R1p+
	18) morgan_19707
	19) nspdkgramian_12180
	20) dragon_SM07_EA(dm)
Burnt
	1) dragon_F04[C-S]
	2) nspdk_1036848
	3) dragon_SM04_EA(dm)
	4) dragon_R3p+
	5) morgan_14072
	6) morgan_11420
	7) nspdkgramian_101003
	8) nspdk_229520
	9) dragon_HATS3v
	10) morgan_7936
	11) nspdk_458029
	12) morgan_21690291
	13) nspdk_195630
	14) nspdk_206799
	15) dragon_R7m+
	16) nspdk_169238
	17) nspdk_268054
	18) nspdk_794324
	19) nspdk_699131
	20) dragon_R4p+
Acid
	1) dragon_ATSC2s
	2) dragon_SpMax4_Bh(s)
	3) dragon_ATSC1s
	4) morgan_6429278
	5) dragon_HTs
	6) dragon_P1p
	7) morgan_66335
	8) episuite_Estimated BP (oC)
	9) dragon_RDF020i
	10) nspdkgramian_62089
	11) dragon_E1m
	12) dragon_ATS2s
	13) dragon_ATSC2e
	14) dragon_MATS7e
	15) dragon_MATS1i
	16) morgan_526185
	17) dragon_F04[C-S]
	18) morgan_526176
	19) dragon_RDF020u
	20) episuite_Estimated VP (mm Hg)
Warm
	1) dragon_Mor17s
	2) morgan_8467
	3) nspdkgramian_8467
	4) dragon_TDB04p
	5) morgan_6429126
	6) dragon_Mor15p
	7) dragon_R6e+
	8) nspdkgramian_539829
	9) morgan_9551283
	10) dragon_Eig05_EA(dm)
	11) morgan_969516
	12) nspdkgramian_101003
	13) morgan_1183
	14) dragon_SpMax1_Bh(m)
	15) nspdkgramian_8655
	16) nspdkgramian_526182
	17) morgan_12127
	18) nspdk_250366
	19) dragon_HATS0s
	20) dragon_TDB03p
Musky
	1) nspdkgramian_36314
	2) dragon_GATS2e
	3) dragon_R4p+
	4) dragon_GATS2s
	5) morgan_15395
	6) dragon_GATS5s
	7) dragon_SssO
	8) dragon_GATS5e
	9) morgan_40923
	10) nspdkgramian_16324
	11) morgan_239
	12) morgan_7742
	13) nspdk_530412
	14) dragon_R3p+
	15) dragon_Mor08s
	16) morgan_264
	17) morgan_549519
	18) morgan_21264
	19) nspdkgramian_7341
	20) nspdkgramian_7745
Sweaty
	1) nspdk_394468
	2) dragon_GATS2s
	3) nspdkgramian_6915
	4) nspdkgramian_876
	5) nspdkgramian_36314
	6) dragon_Mor13m
	7) morgan_264
	8) morgan_15395
	9) nspdk_679352
	10) nspdkgramian_62341
	11) nspdk_102616
	12) nspdkgramian_12587
	13) nspdk_984466
	14) dragon_GATS2e
	15) morgan_5362661
	16) nspdk_291615
	17) nspdkgramian_8314
	18) morgan_7909
	19) nspdkgramian_7991
	20) nspdkgramian_10439470
Ammonia
	1) dragon_ATSC2s
	2) dragon_F01[C-O]
	3) dragon_SssO
	4) dragon_Mor21m
	5) dragon_ATSC2e
	6) dragon_F02[C-O]
	7) dragon_R4p+
	8) nspdk_827740
	9) dragon_RTp+
	10) nspdk_186640
	11) nspdkgramian_1118
	12) dragon_MATS7e
	13) dragon_R1p+
	14) nspdk_584501
	15) nspdk_39035
	16) nspdk_952466
	17) nspdk_61048
	18) morgan_8892
	19) nspdkgramian_8892
	20) nspdk_567430
Decayed
	1) nspdk_390378
	2) nspdkgramian_61171
	3) dragon_SM09_EA(dm)
	4) nspdkgramian_8013
	5) dragon_SM15_EA(dm)
	6) dragon_Mor10m
	7) nspdkgramian_19310
	8) nspdkgramian_9258
	9) nspdkgramian_522181
	10) dragon_SM06_EA(dm)
	11) dragon_Mor07m
	12) nspdkgramian_10902
	13) dragon_SM13_EA(dm)
	14) morgan_62444
	15) nspdk_517164
	16) nspdk_684650
	17) dragon_SM04_EA(dm)
	18) morgan_19310
	19) nspdk_921479
	20) dragon_SM05_EA(dm)
Wood
	1) morgan_37580
	2) morgan_28905
	3) morgan_70752
	4) morgan_61918
	5) morgan_6429280
	6) morgan_519203
	7) morgan_2519
	8) nspdk_952466
	9) nspdkgramian_5634
	10) morgan_28906
	11) dragon_CATS2D_09_NL
	12) nspdk_186503
	13) nspdk_24237
	14) morgan_535856
	15) nspdk_301888
	16) morgan_520585
	17) nspdk_699886
	18) nspdkgramian_6428802
	19) nspdk_551886
	20) morgan_58757170
Grass
	1) morgan_5352539
	2) nspdk_426606
	3) nspdkgramian_15461361
	4) morgan_16220109
	5) nspdk_526261
	6) nspdk_868779
	7) nspdkgramian_5365049
	8) nspdkgramian_69389
	9) nspdk_731085
	10) nspdkgramian_5284503
	11) nspdkgramian_16220109
	12) nspdkgramian_5281167
	13) morgan_15461361
	14) nspdkgramian_5362833
	15) nspdk_877148
	16) nspdkgramian_5363388
	17) nspdkgramian_5283318
	18) nspdk_954437
	19) morgan_520098
	20) nspdk_897934
Flower
	1) dragon_SpMax4_Bh(m)
	2) nspdkgramian_8767
	3) dragon_piPC06
	4) nspdk_698
	5) nspdkgramian_68229
	6) dragon_BLTF96
	7) nspdkgramian_36314
	8) nspdk_203080
	9) dragon_RDF050u
	10) dragon_WiA_Dt
	11) dragon_SpMaxA_Dt
	12) nspdk_981878
	13) dragon_SpMax4_Bh(e)
	14) dragon_SpMin4_Bh(e)
	15) nspdkgramian_61384
	16) dragon_RDF050i
	17) dragon_piPC05
	18) dragon_SpMAD_Dt
	19) nspdk_33888
	20) dragon_SpMax4_Bh(p)
Chemical
	1) dragon_ATSC2s
	2) dragon_TPSA(Tot)
	3) morgan_5054
	4) dragon_RDF020e
	5) dragon_SM1_Dz(m)
	6) dragon_SpMax4_Bh(s)
	7) episuite_Estimated VP (Pa)
	8) dragon_SM1_Dz(Z)
	9) nspdkgramian_61658
	10) dragon_CATS2D_03_LL
	11) morgan_12170
	12) dragon_E1s
	13) morgan_20534
	14) episuite_KOAWIN Log Kaw
	15) episuite_Estimated VP (mm Hg)
	16) dragon_SpMaxA_EA(ed)
	17) morgan_7060
	18) nspdkgramian_8073
	19) dragon_E1m
	20) morgan_74279

In [22]:
from utils import loading
headers,_ = loading.load_molecular_data()
dragon_feature_names = headers[1:]
all_feature_names_onlydragon = ['dragon_%s' % s for s in dragon_feature_names] + ['conc_absolute','conc_relative']
good_feature_names_onlydragon = [all_feature_names_onlydragon[i] for i in good2_onlydragon]
good_feature_names_onlydragon = good_feature_names_onlydragon[:-1] # Remove relative dilution since we didn't use this for the fit
assert(len(good_feature_names_onlydragon) == importance_ranks_onlydragon.shape[1])
new_ranks_onlydragon = pd.DataFrame(importances_onlydragon.T,index=good_feature_names_onlydragon,columns=descriptors)
new_ranks_onlydragon.drop('conc_absolute',inplace=True)

for i,desc in enumerate(descriptors):
    new_sorted_onlydragon = list(new_ranks_onlydragon.sort_values(desc,ascending=False).index)
    print(desc)
    for j in range(20):
        name = new_sorted_onlydragon[j]
        print("\t%d) %s" % (j+1,name))


Intensity
	1) dragon_B03[C-S]
	2) dragon_F03[C-S]
	3) dragon_Eig07_AEA(dm)
	4) dragon_C-028
	5) dragon_O-057
	6) dragon_Mor30p
	7) dragon_MATS7e
	8) dragon_ATSC2p
	9) dragon_NssS
	10) dragon_Wi_B(s)
	11) dragon_Eig08_AEA(dm)
	12) dragon_Psi_i_s
	13) dragon_DLS_01
	14) dragon_LLS_01
	15) dragon_MATS7m
	16) dragon_DISPe
	17) dragon_SpMax3_Bh(s)
	18) dragon_Eig06_EA
	19) dragon_WiA_B(p)
	20) dragon_MATS7s
Pleasantness
	1) dragon_SssO
	2) dragon_RDF015s
	3) dragon_P_VSA_MR_8
	4) dragon_MAXDP
	5) dragon_nHM
	6) dragon_P_VSA_i_1
	7) dragon_Mor17s
	8) dragon_Mor05s
	9) dragon_R2s+
	10) dragon_GATS2e
	11) dragon_P_VSA_m_4
	12) dragon_Mor30s
	13) dragon_CATS2D_04_AL
	14) dragon_RDF025s
	15) dragon_P_VSA_LogP_5
	16) dragon_GATS2s
	17) dragon_R6m+
	18) dragon_Mor02p
	19) dragon_R7s+
	20) dragon_RDF025p
Bakery
	1) dragon_Mor17s
	2) dragon_R7e+
	3) dragon_Mor12s
	4) dragon_SHP2
	5) dragon_Eig12_AEA(ri)
	6) dragon_CIC2
	7) dragon_C-037
	8) dragon_MATS6i
	9) dragon_MATS3s
	10) dragon_R6i+
	11) dragon_R4s+
	12) dragon_HATS7u
	13) dragon_ChiA_B(e)
	14) dragon_SP08
	15) dragon_GATS6v
	16) dragon_G2p
	17) dragon_HATS5i
	18) dragon_DISPe
	19) dragon_SpMax1_Bh(p)
	20) dragon_R6e+
Sweet
	1) dragon_SssO
	2) dragon_Mor17s
	3) dragon_CATS2D_04_AL
	4) dragon_R6s+
	5) dragon_SdO
	6) dragon_E2s
	7) dragon_R1s+
	8) dragon_SpDiam_B(s)
	9) dragon_RDF035e
	10) dragon_Mor30v
	11) dragon_Mor26v
	12) dragon_Mor30s
	13) dragon_E3s
	14) dragon_SpMin1_Bh(m)
	15) dragon_BIC3
	16) dragon_Ds
	17) dragon_RDF035i
	18) dragon_R2s+
	19) dragon_G3m
	20) dragon_R7e+
Fruit
	1) dragon_Eig09_EA(dm)
	2) dragon_Mor10i
	3) dragon_MATS3v
	4) dragon_Mor08u
	5) dragon_Eig08_EA(dm)
	6) dragon_nRCOOR
	7) dragon_X5A
	8) dragon_H8i
	9) dragon_MATS1v
	10) dragon_H8p
	11) dragon_Mor18s
	12) dragon_SpMaxA_EA(dm)
	13) dragon_TDB03r
	14) dragon_J_G/D
	15) dragon_E1s
	16) dragon_GATS1i
	17) dragon_HATS2v
	18) dragon_Mor13s
	19) dragon_Mor26v
	20) dragon_Mor32m
Fish
	1) dragon_X4Av
	2) dragon_P_VSA_m_4
	3) dragon_NssS
	4) dragon_HATS3p
	5) dragon_SssNH
	6) dragon_P_VSA_MR_8
	7) dragon_P_VSA_i_1
	8) dragon_SssS
	9) dragon_HATS3e
	10) dragon_TDB03i
	11) dragon_Mor30e
	12) dragon_Mor32m
	13) dragon_DISPi
	14) dragon_RDF025s
	15) dragon_Mor10m
	16) dragon_Mor30i
	17) dragon_Eig07_EA(ri)
	18) dragon_R2m+
	19) dragon_Mor30u
	20) dragon_SpPosA_B(m)
Garlic
	1) dragon_HATS3p
	2) dragon_P_VSA_m_4
	3) dragon_R3p+
	4) dragon_SssS
	5) dragon_X3Av
	6) dragon_S-107
	7) dragon_Mor05m
	8) dragon_Mor08v
	9) dragon_SpDiam_B(m)
	10) dragon_VE2_RG
	11) dragon_GATS2m
	12) dragon_P_VSA_i_1
	13) dragon_NssS
	14) dragon_VE1_Dz(v)
	15) dragon_Mor25s
	16) dragon_Mor23e
	17) dragon_GATS2v
	18) dragon_SM13_AEA(dm)
	19) dragon_Eta_F
	20) dragon_ChiA_B(s)
Spices
	1) dragon_SaasC
	2) dragon_R2p+
	3) dragon_Mor03s
	4) dragon_MATS8i
	5) dragon_Eig11_EA(ed)
	6) dragon_SM06_AEA(ri)
	7) dragon_GATS8e
	8) dragon_RDF060s
	9) dragon_HATS2p
	10) dragon_TDB04p
	11) dragon_Mor02i
	12) dragon_Mor27u
	13) dragon_JGI5
	14) dragon_SHP2
	15) dragon_R3m+
	16) dragon_IC2
	17) dragon_E2u
	18) dragon_RTe+
	19) dragon_HATS3p
	20) dragon_HATSi
Cold
	1) dragon_R5i
	2) dragon_R4e
	3) dragon_TPSA(Tot)
	4) dragon_R2m+
	5) dragon_H0m
	6) dragon_R4u
	7) dragon_Mor14m
	8) dragon_VR2_G
	9) dragon_R4i
	10) dragon_HATS4v
	11) dragon_HATS0m
	12) dragon_MATS4i
	13) dragon_Eig07_EA(ri)
	14) dragon_Eig11_AEA(bo)
	15) dragon_Mor24s
	16) dragon_E1s
	17) dragon_E1m
	18) dragon_Eig07_EA(bo)
	19) dragon_P_VSA_MR_2
	20) dragon_MATS8s
Sour
	1) dragon_Mor13m
	2) dragon_SpMAD_EA(dm)
	3) dragon_SpMaxA_EA(dm)
	4) dragon_SM05_EA(dm)
	5) dragon_Eig06_EA(ri)
	6) dragon_Mor08m
	7) dragon_CATS2D_01_NL
	8) dragon_E1p
	9) dragon_GATS2e
	10) dragon_P_VSA_MR_5
	11) dragon_HATS4e
	12) dragon_CATS2D_04_NL
	13) dragon_Vindex
	14) dragon_Mor28m
	15) dragon_SM07_EA(dm)
	16) dragon_MATS7s
	17) dragon_Eig08_EA(dm)
	18) dragon_MATS7i
	19) dragon_Eig06_AEA(dm)
	20) dragon_SM04_EA(dm)
Burnt
	1) dragon_F04[C-S]
	2) dragon_SM04_EA(dm)
	3) dragon_Eig14_AEA(ri)
	4) dragon_Mor13s
	5) dragon_R3p+
	6) dragon_R3m+
	7) dragon_SM03_AEA(ri)
	8) dragon_TDB03m
	9) dragon_Eig08_EA(ed)
	10) dragon_Eig14_EA(dm)
	11) dragon_SaaN
	12) dragon_R7m+
	13) dragon_P_VSA_m_4
	14) dragon_Mor08s
	15) dragon_Eig09_EA(dm)
	16) dragon_R3m
	17) dragon_HATS3v
	18) dragon_MLOGP2
	19) dragon_DISPe
	20) dragon_R4p+
Acid
	1) dragon_ATSC1s
	2) dragon_SpMax4_Bh(s)
	3) dragon_P1p
	4) dragon_HTs
	5) dragon_ATSC2s
	6) dragon_F05[C-O]
	7) dragon_SM2_B(s)
	8) dragon_P1e
	9) dragon_RDF020s
	10) dragon_MATS1i
	11) dragon_E1m
	12) dragon_E1e
	13) dragon_Mor23m
	14) dragon_ATSC3e
	15) dragon_GATS3m
	16) dragon_Gm
	17) dragon_P2p
	18) dragon_HATS6p
	19) dragon_HyWi_B(s)
	20) dragon_E1i
Warm
	1) dragon_Mor17s
	2) dragon_R6e+
	3) dragon_Eig05_EA(dm)
	4) dragon_SM6_B(p)
	5) dragon_Mor30s
	6) dragon_VE1_RG
	7) dragon_R3m
	8) dragon_Mor30e
	9) dragon_R4s+
	10) dragon_Mor15p
	11) dragon_TDB05s
	12) dragon_Ds
	13) dragon_TDB06s
	14) dragon_GATS1p
	15) dragon_GATS5e
	16) dragon_SpMax1_Bh(m)
	17) dragon_Dv
	18) dragon_Dm
	19) dragon_HATSm
	20) dragon_GATS4e
Musky
	1) dragon_GATS2e
	2) dragon_RDF025s
	3) dragon_Mor08m
	4) dragon_Mor05s
	5) dragon_GATS5e
	6) dragon_R4p+
	7) dragon_RDF045s
	8) dragon_X4A
	9) dragon_GATS5s
	10) dragon_HATS2p
	11) dragon_Eig06_AEA(dm)
	12) dragon_Mor10i
	13) dragon_Mor08s
	14) dragon_CATS2D_02_NL
	15) dragon_JGI5
	16) dragon_GATS2s
	17) dragon_RDF015s
	18) dragon_P_VSA_MR_6
	19) dragon_Mor21m
	20) dragon_Mor21p
Sweaty
	1) dragon_GATS2e
	2) dragon_Mor13m
	3) dragon_JGI5
	4) dragon_Mor08s
	5) dragon_X4A
	6) dragon_Eig06_AEA(dm)
	7) dragon_MATS4v
	8) dragon_Eig08_EA(dm)
	9) dragon_Mor08m
	10) dragon_GATS5s
	11) dragon_GATS2s
	12) dragon_GATS5e
	13) dragon_Eig05_AEA(dm)
	14) dragon_Mor04m
	15) dragon_Eig06_AEA(ri)
	16) dragon_nRCOOH
	17) dragon_HATS5s
	18) dragon_GATS5m
	19) dragon_SsssCH
	20) dragon_Mor21m
Ammonia
	1) dragon_SssO
	2) dragon_MATS7e
	3) dragon_SpMax2_Bh(m)
	4) dragon_R3u
	5) dragon_ATSC2s
	6) dragon_Mor13s
	7) dragon_Mor30e
	8) dragon_GATS6s
	9) dragon_Mor16m
	10) dragon_CATS2D_04_AL
	11) dragon_E2p
	12) dragon_Mor08s
	13) dragon_F01[C-O]
	14) dragon_GATS2s
	15) dragon_MATS6m
	16) dragon_Mor11p
	17) dragon_Mor26s
	18) dragon_F02[C-O]
	19) dragon_GATS7m
	20) dragon_ATSC8m
Decayed
	1) dragon_P_VSA_m_4
	2) dragon_Mor07p
	3) dragon_NssS
	4) dragon_Mor13m
	5) dragon_SM11_EA(dm)
	6) dragon_SM09_EA(dm)
	7) dragon_GATS2e
	8) dragon_Mor05m
	9) dragon_SM15_EA(dm)
	10) dragon_Mor07m
	11) dragon_Mor08s
	12) dragon_SM13_EA(dm)
	13) dragon_Mor08e
	14) dragon_Eig08_EA(dm)
	15) dragon_Eta_alpha_A
	16) dragon_Mor13v
	17) dragon_R3p+
	18) dragon_Mor10m
	19) dragon_SM14_EA(dm)
	20) dragon_Mor32m
Wood
	1) dragon_CATS2D_08_NL
	2) dragon_MLOGP2
	3) dragon_SaaN
	4) dragon_Mor32e
	5) dragon_N%
	6) dragon_MATS2m
	7) dragon_Mor32s
	8) dragon_MATS2v
	9) dragon_Mor30s
	10) dragon_Mor21v
	11) dragon_F02[C-N]
	12) dragon_HATS5i
	13) dragon_SpMax1_Bh(v)
	14) dragon_Mor08p
	15) dragon_P_VSA_MR_5
	16) dragon_CATS2D_09_NL
	17) dragon_R6i
	18) dragon_Dm
	19) dragon_GATS2s
	20) dragon_MATS1s
Grass
	1) dragon_SpMaxA_G/D
	2) dragon_HATS5p
	3) dragon_GATS4m
	4) dragon_WiA_G/D
	5) dragon_Chi_G/D
	6) dragon_GATS4e
	7) dragon_Mor14e
	8) dragon_MATS1v
	9) dragon_P_VSA_MR_7
	10) dragon_C-029
	11) dragon_F04[C-N]
	12) dragon_Mor32e
	13) dragon_G3s
	14) dragon_Mor09s
	15) dragon_Mor12i
	16) dragon_Mor12e
	17) dragon_Mor09p
	18) dragon_ATSC3i
	19) dragon_G2u
	20) dragon_Mor14s
Flower
	1) dragon_SpMax4_Bh(i)
	2) dragon_SpMax4_Bh(m)
	3) dragon_WiA_Dt
	4) dragon_SpMAD_Dt
	5) dragon_H_D/Dt
	6) dragon_piPC05
	7) dragon_Mor09s
	8) dragon_VE2_RG
	9) dragon_Mor10u
	10) dragon_RDF025v
	11) dragon_Mor10i
	12) dragon_Mor20e
	13) dragon_MATS4v
	14) dragon_MATS4p
	15) dragon_Mor21v
	16) dragon_RDF050u
	17) dragon_REIG
	18) dragon_R8m+
	19) dragon_Mor21s
	20) dragon_D/Dtr10
Chemical
	1) dragon_TPSA(Tot)
	2) dragon_ATSC2s
	3) dragon_SM1_Dz(m)
	4) dragon_RDF020e
	5) dragon_SM1_Dz(Z)
	6) dragon_GATS4s
	7) dragon_nArOH
	8) dragon_HyWi_B(s)
	9) dragon_E1s
	10) dragon_GATS5i
	11) dragon_P1p
	12) dragon_Dm
	13) dragon_SpMax4_Bh(s)
	14) dragon_SpMaxA_EA(ed)
	15) dragon_Eig07_AEA(ri)
	16) dragon_Mor13s
	17) dragon_R2s
	18) dragon_Mor22s
	19) dragon_RDF020u
	20) dragon_SpMax_B(m)

In [4]:
nspdk_dict = dream.make_nspdk_dict(all_CIDs)

In [9]:
x = 0
for key,value in list(nspdk_dict.items()):
    x += len(value)
x /= len(nspdk_dict)
x


Out[9]:
7.506491097922849

In [8]:
len(nspdk_dict)


Out[8]:
5392

In [ ]: