In [1]:
%matplotlib inline
import pandas as pd
from matplotlib import pyplot as plt
from atntools.trees import *
from atntools.tree_ranges import *
set_number = 47
tree_file = '../../data/set{}/weka-output-j48.txt'.format(set_number)
feature_file = '../../data/set{0}/features.set{0}.labelled.csv'.format(set_number)
tree = parse_weka_j48_output_file(tree_file)
instances = pd.read_csv(feature_file)
# for plotting parameter values of best instances; does not affect other computations
instances.sort_values('environmentScoreSlope200', ascending=False, inplace=True)
distributions = get_distributions(tree, instances)
range_weights = get_range_weights(distributions)
min_weight = 1.0
max_weight = -1.0
for segments in range_weights.values():
for low, high, weight in segments:
min_weight = min(weight, min_weight)
max_weight = max(weight, max_weight)
legend_displayed = False
num_params = len(distributions)
plt.figure(figsize=(9, 3 * num_params))
for i, (param, distribution) in enumerate(sorted(distributions.items())):
ax = plt.subplot(num_params, 2, i * 2 + 1)
plt.title(param + " - simulation outcomes")
plt.xlabel(param + " parameter value")
plt.ylabel("number of simulations")
left = []; width = []
bottom_unlabeled = []; bottom_good = []
height_bad = []; height_unlabeled = []; height_good = []
#max_instances = 0
for low, high, good, bad, unlabeled in distribution:
left.append(low)
width.append(high - low)
bottom_unlabeled.append(bad)
bottom_good.append(bad + unlabeled)
height_bad.append(bad)
height_unlabeled.append(unlabeled)
height_good.append(good)
#max_instances = max(max_instances, good + bad + unlabeled)
#ax.set_ylim(0, max_instances)
plt.bar(left=left, bottom=bottom_good, height=height_good, width=width, color='green', label='good')
plt.bar(left=left, height=height_bad, width=width, color='orange', label='bad')
plt.bar(left=left, bottom=bottom_unlabeled, height=height_unlabeled, width=width, color='lightgray', label='unlabeled')
#plt.bar(left=left, bottom=bottom_unlabeled, height=height_unlabeled, width=width, color='white', hatch='/', label='unlabeled')
if not legend_displayed:
plt.legend()
legend_displayed = True
ranges = range_weights[param]
ax = plt.subplot(num_params, 2, i * 2 + 2)
plt.title(param + " - parameter range scores")
plt.xlabel(param + " parameter value")
plt.ylabel("P(good) - P(bad)")
ax.set_ylim(min_weight - 0.1, max_weight + 0.1)
left = []; height = []; width = []; color = []
for low, high, weight in ranges:
left.append(low)
height.append(weight)
width.append(high - low)
color.append('green' if weight > 0 else 'orange')
plt.bar(left=left, height=height, width=width, color=color)
# Plot best instances
for i in range(5):
plt.plot([instances[param].iloc[i]], [0], 'co', markersize=(16-3*i))
#plt.savefig('tree-range_set-{}_{}'.format(set_number, param), dpi=600)
plt.tight_layout()
In [ ]: