In [1]:
import py_irf_benchmarks_utils
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, '../jupyter/utils')
from irf_jupyter_utils import _get_histogram
# recall output file
file_in = 'specs/iRF_mod01.yaml'
specs = py_irf_benchmarks_utils.yaml_to_dict(inp_yaml=file_in)
# specify output file
file_out = 'output/iRF_mod01_out.yaml'
bm = py_irf_benchmarks_utils.yaml_to_dict(inp_yaml=file_out)
In [2]:
# calling a helper function
# third argument is x_axis, fourth argument is y_axis
py_irf_benchmarks_utils.plot_bm(bm, specs, 'n_estimators', 'time')
py_irf_benchmarks_utils.plot_bm(bm, specs, 'n_estimators', 'accuracy_score')
py_irf_benchmarks_utils.plot_bm(bm, specs, 'n_estimators', 'log_loss')
In [3]:
# Print the feature importance rankings from one trial
print("Feature ranking:")
feature_importances = bm[2]['feature_importances'][0]
feature_importances_rank_idx = np.argsort(feature_importances)[::-1]
for f in range(len(feature_importances)):
print("%d. feature %d (%f)" % (f + 1
, feature_importances_rank_idx[f]
, feature_importances[feature_importances_rank_idx[f]]))
In [4]:
# Plot the feature importance rankings from one trial
width = 12
height = 8
plt.figure(figsize=(width, height))
plt.title("Feature importances")
plt.bar(range(len(feature_importances))
, feature_importances[feature_importances_rank_idx]
, color="r"
, align="center")
plt.xticks(range(len(feature_importances)), feature_importances_rank_idx)
plt.xlim([-1, len(feature_importances)])
plt.show()
In [5]:
# lets look at the top 5 features across the trials
for i in range(specs['n_trials'][0]):
feature_importances = bm[2]['feature_importances'][i]
feature_importances_rank_idx = np.argsort(feature_importances)[::-1]
print('trial'+str(i), feature_importances_rank_idx[0:5])
In [6]:
# plot stability scores for one trial
stability_scores = bm[2]['stability_all'][0]
_get_histogram(stability_scores, sort = True)
In [7]:
# examine top 5 stability scores across trials
for i in range(specs['n_trials'][0]):
stability_scores = bm[2]['stability_all'][i]
data_y = sorted(stability_scores.values(), reverse=True)
data_x = sorted(stability_scores, key=stability_scores.get, reverse=True)
print('trial'+str(i), data_x[0:5])
In [ ]: