In [1]:
import pickle
final_desc_set_svr = pickle.load(open('final_desc_set_svr.pickle', 'rb'))
correlated_descs_svr = pickle.load(open('correlated_descs_svr.pickle', 'rb'))
importances_rf = pickle.load(open('importances_rf.pickle', 'rb'))
In [2]:
tf_top = importances_rf[importances_rf['scaled_importance'] > 0.1].variable.tolist()
tf_top
Out[2]:
In [3]:
final_desc_set_with_correlated = set()
for desc in final_desc_set_svr:
final_desc_set_with_correlated.add(desc)
final_desc_set_with_correlated.update(correlated_descs_svr[desc])
hits = [ x for x in tf_top if x in final_desc_set_with_correlated]
In [4]:
len(hits) / len(tf_top)
Out[4]:
In [ ]: