In [6]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import json
import matplotlib as mpl
label_size = 20
mpl.rcParams['xtick.labelsize'] = label_size
mpl.rcParams['ytick.labelsize'] = label_size
In [ ]:
agent_labels = {'BayesAgent':(r'AI$\xi$','red'),
'MC-AIXI':('MC-AIXI','red'),
'MC-AIMU':('MC-AIMU','blue'),
'MDL Agent':('MDL','blue'),
'MC-AIXI-Dirichlet':('MC-AIXI-Dirichlet','blue'),
'Knowledge-seeking agent':('Kullback-Leibler','blue'),
'KullbackLeiblerKSA':('Kullback-Leibler','blue'),
'ShannonKSA':('Shannon','green'),
'SquareKSA':('Square','red'),
'Shannon KSA':('Shannon','orange'),
'Square KSA':('Square','red'),
'ThompsonAgent':('Thompson Sampling','blue'),
'Thompson Sampling':('Thompson Sampling','blue'),
'QLearn':('Q-Learning','black'),
'Q-Learning':('Q-Learning','black'),
'KSA-Dirichlet': ('Kullback-Leibler','blue'),
'Entropy-seeking agent': ('Shannon','orange'),
'Square KSA-Dirichlet': ('Square','red')}
def plot_results(directory,
filename='results-1',
objective=None,
outfile=None,
show_optimal=False,
show_variance=True,
show_maxmin=False):
# some cruft to add default labels
if not objective:
if 'ksa' in directory:
objective = 'explored'
else:
objective = 'rewards'
if objective == 'rewards':
y_axis = 'Average Reward'
elif objective == 'explored':
y_axis = 'Exploration (%)'
file = open(directory + '/' + filename + '.json')
data = json.load(file)
file.close()
fig = plt.figure(figsize=(12,8),dpi=200)
# iterate over configs
for i,k in enumerate(data):
try:
d = data[k]
except KeyError:
continue
cycles = d[0]['cycles']
runs = len(d)
A = np.zeros((cycles,runs))
for j in range(runs):
A[:,j] = np.array(d[j][objective][:cycles])
mu = np.mean(A,1)
sigma = np.std(A,1)
a = np.max(np.vstack((mu-sigma,np.min(A,1))),0)
b = np.min(np.vstack((mu+sigma,np.array(cycles*[100]))),0)
if k in agent_labels:
lab = agent_labels[k][0]
color = agent_labels[k][1]
alpha = 0.1
if show_variance:
plt.plot(a,color=color,alpha=alpha)
plt.plot(b,color=color,alpha=alpha)
plt.fill_between(np.arange(cycles),a,b,alpha=alpha,color=color)
if show_maxmin:
plt.plot(np.max(A,axis=1),color=color,linestyle='-.')
plt.plot(np.min(A,axis=1),color=color,linestyle='-.')
plt.plot(mu,label=lab,color=color,lw=3)
if objective=='rewards' and show_optimal:
# NOTE: hardcoded for optimal policy in one gridworld
xs = np.array(range(cycles))
ys = np.zeros(cycles)
ys[:11] = -1.
ys[11:] = 75.
ys = np.cumsum(ys)
ys[1:] /= xs[1:]
plt.plot(xs,ys,'k--',lw=3,label='Optimal')
plt.xlabel('Cycles',fontsize=30)
plt.ylabel(y_axis,fontsize=30)
plt.legend(fontsize=25,loc='lower right')
plt.margins(0.01,0)
#plt.ylim([-1,100])
if outfile:
plt.savefig(directory + '/' + outfile + '.png', bbox_inches='tight')
plt.close()
plot_results('aixi-models','results-3',show_optimal=True)
In [13]:
def plot_rc_results(directory,
filename='results',
objective='rewards',
outfile=None,
show_variance=True,
runs=5,
cycles=1000000,
color='red',
ls='solid',
label='Q-learning'):
if objective == 'rewards':
y_axis = 'Average Observed Reward'
elif objective == 'corrupt_rewards':
y_axis = 'Average Corrupt Reward'
elif objective == 'true_rewards':
y_axis = 'Average True Reward'
A = np.zeros((cycles,runs))
for j in range(runs):
file = open(directory + '/' + filename + '-' + str(j+1) + '.json')
data = json.load(file)
file.close()
A[:,j] = np.array(data['Reward Corruption'][0][objective])
mu = np.mean(A,1)
sigma = np.std(A,1)
a = np.max(np.vstack((mu-sigma,np.min(A,1))),0)
b = np.min(np.vstack((mu+sigma,np.array(cycles*[100]))),0)
alpha = 0.1
if show_variance:
plt.plot(a, color=color, alpha=alpha, ls=ls)
plt.plot(b, color=color, alpha=alpha, ls=ls)
plt.fill_between(np.arange(cycles), a, b, alpha=alpha, color=color)
plt.plot(mu, color=color, label=label, lw=3, ls=ls)
plt.xscale('log')
plt.xlabel('Cycles', fontsize=20)
if outfile:
plt.savefig(directory + '/' + outfile + '.png', bbox_inches='tight')
plt.close()
In [14]:
### plot results for different agents on the same plot
runs=100 # number of runs
goals='4' # number of goal tiles
for rew in ('true_', ''):
plot_rc_results('reward-corruption/goals' + goals + '_qlearning', 'results', rew+'rewards', runs=runs, color='red')
plot_rc_results('reward-corruption/goals' + goals + '_softmax', 'results', rew+'rewards', runs=runs, color='orange', label='Softmax')
plot_rc_results('reward-corruption/goals' + goals + '_quantiliser_delta.2', 'results', rew+'rewards', runs=runs, color='black', label='Quantiliser (.2)')
plot_rc_results('reward-corruption/goals' + goals + '_quantiliser_delta.5', 'results', rew+'rewards', runs=runs, color='blue', label='Quantiliser (.5)')
plot_rc_results('reward-corruption/goals' + goals + '_quantiliser_delta.8', 'results', rew+'rewards', runs=runs, color='green', label='Quantiliser (.8)')
plt.ylim([0, 1])
plt.legend(loc=2)
plt.savefig('reward-corruption/goals' + goals + '_' + rew + '.png', bbox_inches='tight', format='png')
plt.close()
In [23]:
# compute average observed and true rewards
def round_to_n(x, n):
return round(x, -int(np.floor(np.log10(x))) + (n - 1))
def comb(a, b):
return str(round_to_n(a,3)) + ' +- ' + str(round_to_n(b,2))
combine = np.vectorize(comb)
def average_results(directory,
filename='results',
runs=100,
cycles=1000000):
A = np.zeros((2,runs))
for j in range(runs):
file = open(directory + '/' + filename + '-' + str(j+1) + '.json')
data = json.load(file)
file.close()
A[0,j] = data['Reward Corruption'][0]['rewards'][cycles-1]
A[1,j] = data['Reward Corruption'][0]['true_rewards'][cycles-1]
res = combine(np.mean(A,1), np.std(A, 1))
print (directory + '\t observed rewards ' + res[0] + '\t true rewards ' + res[1])
average_results('reward-corruption/goals' + goals + '_qlearning')
average_results('reward-corruption/goals' + goals + '_softmax')
average_results('reward-corruption/goals' + goals + '_quantiliser_delta.2')
average_results('reward-corruption/goals' + goals + '_quantiliser_delta.5')
average_results('reward-corruption/goals' + goals + '_quantiliser_delta.8')
In [ ]: