Licensed under the Apache License, Version 2.0.
In [0]:
import matplotlib.pyplot as plt
import pickle
import numpy as np
import os
import itertools
from scipy import stats
from collections import defaultdict
import seaborn as sns
sns.set_style('white')
import tensorflow as tf
gfile = tf.compat.v1.gfile
In [0]:
rootdir = '/tmp/wheel_bandit'
algos = []
deltas = [0.5, 0.7, 0.9, 0.95, 0.99]
colors ={'neurolinear':'green', 'gnp_cnp_offline':'red', 'gnp_np_offline':'brown', 'gnp_acnp_offline':'magenta', 'gnp_acns_offline':'black', 'gnp_anp_offline': 'blue'}
labels = {'neurolinear':'neurolinear', 'gnp_cnp_offline':'no latents', 'gnp_np_offline':'global', 'gnp_acnp_offline':'no latents + attention', 'gnp_acns_offline':'global + local + attention', 'gnp_anp_offline': 'global + attention'}
In [0]:
all_results_dict = {}
for delta in deltas:
print('delta', delta)
aggfile = os.path.join(rootdir, 'results', str(delta) + '_all_results.pkl')
with gfile.Open(aggfile, 'rb') as infile:
results = pickle.load(infile)
all_results_dict[str(delta)] = results
In [0]:
all_cum_regrets_mean, all_cum_regrets_std = defaultdict(list), defaultdict(list)
all_simple_regrets_mean, all_simple_regrets_std = defaultdict(list), defaultdict(list)
for delta in all_results_dict.keys():
results = all_results_dict[str(delta)]
uniform_results = np.cumsum(np.array(results['uniform']), axis=-1)
uniform_cum_regret = np.mean(uniform_results, axis=0)
for algo in results.keys():
if algo == 'uniform':
continue
cum_regrets = np.cumsum(np.array(results[algo]), axis=-1)
mean_cum_regret = np.mean(cum_regrets, axis=0)
std_cum_regret = stats.sem(cum_regrets, axis=0)
simple_regrets = np.sum(np.array(results[algo])[:, -500:], axis=-1)
mean_simple_regret = np.mean(simple_regrets)
std_simple_regret = stats.sem(simple_regrets)
print('algo', algo, 'num trials', len(cum_regrets),
'cum regret (mean/stderr)', mean_cum_regret[-1], std_cum_regret[-1],
'simple regret (mean/stderr)', mean_simple_regret, std_cum_regret[-1])
plt.plot(2+np.arange(len(mean_cum_regret)), mean_cum_regret, label=labels[algo], color=colors[algo])
all_cum_regrets_mean[algo].append(mean_cum_regret[-1])
all_cum_regrets_std[algo].append(std_cum_regret[-1])
all_simple_regrets_mean[algo].append(mean_simple_regret)
all_simple_regrets_std[algo].append(std_simple_regret)
plt.legend()
plt.title('delta='+str(delta))
plt.xlabel('number of timesteps')
plt.ylabel('cummulative regret')
plt.show()
In [0]:
for algo in all_cum_regrets_mean.keys():
plt.errorbar(deltas, all_cum_regrets_mean[algo], yerr=all_cum_regrets_std[algo], linestyle='-', label=labels[algo], color=colors[algo], marker='.')
plt.legend()
plt.title('Cummulative Regret')
plt.xlabel('delta')
plt.ylabel('final cummulative regret')
Out[0]:
In [0]:
for algo in all_simple_regrets_mean.keys():
plt.errorbar(deltas, all_simple_regrets_mean[algo], yerr=all_simple_regrets_std[algo], linestyle='-', label=labels[algo], color=colors[algo], marker='.')
plt.legend()
plt.title('Simple Regret')
plt.xlabel('delta')
plt.ylabel('final simple regret')
Out[0]:
In [0]: