Licensed under the Apache License, Version 2.0.


In [0]:
import matplotlib.pyplot as plt
import pickle
import numpy as np
import os
import itertools
from scipy import stats
from collections import defaultdict
import seaborn as sns
sns.set_style('white')
import tensorflow as tf
gfile = tf.compat.v1.gfile

In [0]:
rootdir = '/tmp/wheel_bandit'
algos = []
deltas = [0.5, 0.7, 0.9, 0.95, 0.99]
colors ={'neurolinear':'green', 'gnp_cnp_offline':'red', 'gnp_np_offline':'brown', 'gnp_acnp_offline':'magenta', 'gnp_acns_offline':'black', 'gnp_anp_offline': 'blue'}
labels = {'neurolinear':'neurolinear', 'gnp_cnp_offline':'no latents', 'gnp_np_offline':'global', 'gnp_acnp_offline':'no latents + attention', 'gnp_acns_offline':'global + local + attention', 'gnp_anp_offline': 'global + attention'}

In [0]:
all_results_dict = {}
for delta in deltas:
  print('delta', delta)
  aggfile = os.path.join(rootdir, 'results', str(delta) + '_all_results.pkl')
  with gfile.Open(aggfile, 'rb') as infile:
    results = pickle.load(infile) 
    all_results_dict[str(delta)] = results


delta 0.5
delta 0.7
delta 0.9
delta 0.95
delta 0.99

In [0]:
all_cum_regrets_mean, all_cum_regrets_std = defaultdict(list), defaultdict(list)
all_simple_regrets_mean, all_simple_regrets_std = defaultdict(list), defaultdict(list)

for delta in all_results_dict.keys():

  results = all_results_dict[str(delta)]
  uniform_results = np.cumsum(np.array(results['uniform']), axis=-1)
  uniform_cum_regret = np.mean(uniform_results, axis=0)

  for algo in results.keys():
    if algo == 'uniform':
      continue
    cum_regrets = np.cumsum(np.array(results[algo]), axis=-1)
    mean_cum_regret = np.mean(cum_regrets, axis=0)
    std_cum_regret = stats.sem(cum_regrets, axis=0)
    
    simple_regrets = np.sum(np.array(results[algo])[:, -500:], axis=-1)
    mean_simple_regret = np.mean(simple_regrets)
    std_simple_regret = stats.sem(simple_regrets)
    
    print('algo', algo, 'num trials', len(cum_regrets),
          'cum regret (mean/stderr)', mean_cum_regret[-1], std_cum_regret[-1],
          'simple regret (mean/stderr)', mean_simple_regret, std_cum_regret[-1])
    plt.plot(2+np.arange(len(mean_cum_regret)), mean_cum_regret, label=labels[algo], color=colors[algo])

    all_cum_regrets_mean[algo].append(mean_cum_regret[-1])
    all_cum_regrets_std[algo].append(std_cum_regret[-1])

    all_simple_regrets_mean[algo].append(mean_simple_regret)
    all_simple_regrets_std[algo].append(std_simple_regret)

  plt.legend()
  plt.title('delta='+str(delta))
  plt.xlabel('number of timesteps')
  plt.ylabel('cummulative regret')
  plt.show()


algo neurolinear num trials 47 cum regret (mean/stderr) 23720.839856363367 392.92462343425694 simple regret (mean/stderr) 115.1031946503248 392.92462343425694
algo gnp_cnp_offline num trials 48 cum regret (mean/stderr) 302979.96456880274 604.4571293894155 simple regret (mean/stderr) 1965.9925126999544 604.4571293894155
algo gnp_np_offline num trials 50 cum regret (mean/stderr) 125767.3098538105 375.79172763258373 simple regret (mean/stderr) 783.982853506345 375.79172763258373
algo gnp_anp_offline num trials 50 cum regret (mean/stderr) 805156.0953747405 683.4758522003488 simple regret (mean/stderr) 4994.0094313489735 683.4758522003488
algo gnp_acnp_offline num trials 50 cum regret (mean/stderr) 267399.94721675577 609.3630592901035 simple regret (mean/stderr) 1681.6943742670567 609.3630592901035
algo gnp_acns_offline num trials 48 cum regret (mean/stderr) 55613.97749697292 394.23633884166753 simple regret (mean/stderr) 352.07294707327657 394.23633884166753
algo neurolinear num trials 48 cum regret (mean/stderr) 21956.416814281485 509.32352673575895 simple regret (mean/stderr) 105.60827359302728 509.32352673575895
algo gnp_cnp_offline num trials 50 cum regret (mean/stderr) 56030.19044015195 252.00419737996344 simple regret (mean/stderr) 347.4956192390537 252.00419737996344
algo gnp_np_offline num trials 49 cum regret (mean/stderr) 23606.948392790913 135.10698615488104 simple regret (mean/stderr) 155.52171041221868 135.10698615488104
algo gnp_anp_offline num trials 50 cum regret (mean/stderr) 269691.7438699906 558.4866845369053 simple regret (mean/stderr) 1663.2520360576593 558.4866845369053
algo gnp_acnp_offline num trials 50 cum regret (mean/stderr) 64873.86296134676 254.47481840123098 simple regret (mean/stderr) 387.6518340090461 254.47481840123098
algo gnp_acns_offline num trials 49 cum regret (mean/stderr) 27836.774220723793 175.51684366048195 simple regret (mean/stderr) 166.56187554293822 175.51684366048195
algo neurolinear num trials 46 cum regret (mean/stderr) 21941.029605376898 1175.7481370276716 simple regret (mean/stderr) 78.38445823547264 1175.7481370276716
algo gnp_cnp_offline num trials 50 cum regret (mean/stderr) 14676.582017053512 49.70680980613376 simple regret (mean/stderr) 90.18163812795058 49.70680980613376
algo gnp_np_offline num trials 47 cum regret (mean/stderr) 14572.805633999762 38.008412334177 simple regret (mean/stderr) 92.62166893961255 38.008412334177
algo gnp_anp_offline num trials 50 cum regret (mean/stderr) 16835.359516760138 70.42783303627137 simple regret (mean/stderr) 101.82164183506316 70.42783303627137
algo gnp_acnp_offline num trials 50 cum regret (mean/stderr) 17933.431890531156 71.8921062175761 simple regret (mean/stderr) 108.55410195503144 71.8921062175761
algo gnp_acns_offline num trials 49 cum regret (mean/stderr) 17597.90836834028 135.68952362335546 simple regret (mean/stderr) 101.47937218538296 135.68952362335546
algo neurolinear num trials 50 cum regret (mean/stderr) 19376.950556342133 465.1031838067209 simple regret (mean/stderr) 70.95502060721464 465.1031838067209
algo gnp_cnp_offline num trials 50 cum regret (mean/stderr) 12374.692178738573 36.75473249334765 simple regret (mean/stderr) 76.7444275612786 36.75473249334765
algo gnp_np_offline num trials 47 cum regret (mean/stderr) 15459.361302588222 34.17333577056086 simple regret (mean/stderr) 97.01743539864745 34.17333577056086
algo gnp_anp_offline num trials 49 cum regret (mean/stderr) 16554.415130809855 46.10471549896332 simple regret (mean/stderr) 102.43978736538484 46.10471549896332
algo gnp_acnp_offline num trials 50 cum regret (mean/stderr) 15884.275148458679 45.590427618934065 simple regret (mean/stderr) 99.1423149864133 45.590427618934065
algo gnp_acns_offline num trials 49 cum regret (mean/stderr) 28719.743167751254 213.82979107187094 simple regret (mean/stderr) 182.56883284869292 213.82979107187094
algo neurolinear num trials 50 cum regret (mean/stderr) 20704.505960470477 916.5116971558841 simple regret (mean/stderr) 87.74391625856205 916.5116971558841
algo gnp_cnp_offline num trials 49 cum regret (mean/stderr) 6763.490684440271 23.31066558106428 simple regret (mean/stderr) 40.73463088750351 23.31066558106428
algo gnp_np_offline num trials 48 cum regret (mean/stderr) 15895.561421356708 23.012369649182194 simple regret (mean/stderr) 99.88115291773984 23.012369649182194
algo gnp_anp_offline num trials 49 cum regret (mean/stderr) 15803.209578479371 31.901132273581293 simple regret (mean/stderr) 98.64776086788491 31.901132273581293
algo gnp_acnp_offline num trials 50 cum regret (mean/stderr) 11596.090049579263 19.768083931777543 simple regret (mean/stderr) 73.47574117526644 19.768083931777543
algo gnp_acns_offline num trials 49 cum regret (mean/stderr) 23170.026944600693 110.62175658034802 simple regret (mean/stderr) 140.83859304263555 110.62175658034802

In [0]:
for algo in all_cum_regrets_mean.keys():
    plt.errorbar(deltas, all_cum_regrets_mean[algo], yerr=all_cum_regrets_std[algo], linestyle='-', label=labels[algo], color=colors[algo], marker='.')

plt.legend()
plt.title('Cummulative Regret')
plt.xlabel('delta')
plt.ylabel('final cummulative regret')


Out[0]:
Text(0, 0.5, 'final cummulative regret')

In [0]:
for algo in all_simple_regrets_mean.keys():
    plt.errorbar(deltas, all_simple_regrets_mean[algo], yerr=all_simple_regrets_std[algo], linestyle='-', label=labels[algo], color=colors[algo], marker='.')

plt.legend()
plt.title('Simple Regret')
plt.xlabel('delta')
plt.ylabel('final simple regret')


Out[0]:
Text(0, 0.5, 'final simple regret')

In [0]: