In [1]:
import pickle

final_desc_set_svr = pickle.load(open('final_desc_set_svr.pickle', 'rb'))
correlated_descs_svr = pickle.load(open('correlated_descs_svr.pickle', 'rb'))
importances_rf = pickle.load(open('importances_rf.pickle', 'rb'))

In [2]:
tf_top = importances_rf[importances_rf['scaled_importance'] > 0.1].variable.tolist()
tf_top


Out[2]:
['MinPartialCharge',
 'MaxAbsPartialCharge',
 'MolMR',
 'fr_benzene',
 'NumAromaticRings',
 'NumAromaticCarbocycles',
 'MaxPartialCharge',
 'Chi4v',
 'BalabanJ',
 'MolLogP',
 'MinAbsPartialCharge',
 'Chi4n',
 'Chi1v',
 'TPSA',
 'SlogP_VSA1',
 'Chi0n',
 'Chi1n']

In [3]:
final_desc_set_with_correlated = set()
for desc in final_desc_set_svr:
    final_desc_set_with_correlated.add(desc)
    final_desc_set_with_correlated.update(correlated_descs_svr[desc])

hits = [ x for x in tf_top if x in final_desc_set_with_correlated]

In [4]:
len(hits) / len(tf_top)


Out[4]:
1.0

In [ ]: