REINFORCE vs AKT statistics for target task

Imports


In [ ]:
import sys
sys.path.append("/Users/arnomoonens/Dropbox/MA2-AI/Thesis/code/DeepRL/")
from misc.plot_statistics import *
from sklearn.metrics import auc
from tensorflow.python.summary.event_multiplexer import EventMultiplexer, GetLogdirSubdirectories
import logging
logging.getLogger("tensorflow").setLevel(logging.WARNING)

%matplotlib inline

Loading data


In [ ]:
sum_dir_akt_5 = "/Users/arnomoonens/Dropbox/MA2-AI/Thesis/experiments/cluster/AKT/exp15/exp62768"
sum_dir_akt_10 = "/Users/arnomoonens/Dropbox/MA2-AI/Thesis/experiments/cluster/AKT/exp13/exp62479"
sum_dir_re = "/Users/arnomoonens/Dropbox/MA2-AI/Thesis/experiments/cluster/RE/exp16/exp62796"

em_akt_5 = EventMultiplexer().AddRunsFromDirectory(sum_dir_akt_5).Reload()
em_akt_10 = EventMultiplexer().AddRunsFromDirectory(sum_dir_akt_10).Reload()
em_re  = EventMultiplexer().AddRunsFromDirectory(sum_dir_re).Reload()

Processing data


In [ ]:
data_akt_5 = tf_scalar_data(em_akt_5)
data_akt_10 = tf_scalar_data(em_akt_10)
data_re = tf_scalar_data(em_re)

rewards_re = np.array(data_re["Rewards_1"][0]["values"])[:,:100]
mean_rewards_re = np.mean(rewards_re, axis=0)[:100]

rewards_akt_10 = np.array(data_akt_10["Reward"][4]["values"])[:,:100]
mean_rewards_akt_10 = np.mean(rewards_akt_10, axis=0)

rewards_akt_5 = np.array(data_akt_5["Reward"][4]["values"])[:,:100]
mean_rewards_akt_5 = np.mean(rewards_akt_5, axis=0)

x = range(100)

Plot


In [ ]:
plt.plot(x, mean_rewards_re, label="REINFORCE")
plt.plot(x, mean_rewards_akt_5, label="TLA")
#plt.plot(x, mean_rewards_akt_10, label="AKT 10")
plt.xlabel("Epoch")
plt.ylabel("Reward")
plt.xlim(xmin=0, xmax=30)
plt.legend()
plt.show()

Other statistics


In [ ]:
print("Jumpstart")
print("\tMean\n\t\tREINFORCE: {:.3f}\n\t\tAKT 5: {:.3f}\n\t\tAKT 10: {:.3f}".format(mean_rewards_re[0], mean_rewards_akt_5[0], mean_rewards_akt_10[0]))
auc_re = auc(x, mean_rewards_re)
auc_akt_5 = auc(x, mean_rewards_akt_5)
auc_akt_10 = auc(x, mean_rewards_akt_10)
print("\tMedian\n")
print("\t\tREINFORCE: {:.3f}\n\t\tAKT 5: {:.3f}\n\t\tAKT 10: {:.3f}".format(np.median(rewards_re[:,0]), np.median(rewards_akt_5[:,0]), np.median(rewards_akt_10[:,0])))
print("Total rewards (AUC)\n\tREINFORCE: {:.3f}\n\tAKT 5: {:.3f}\n\tAKT 10: : {:.3f}".format(auc_re, auc_akt_5, auc_akt_10))
print("Transfer ratio\n\tFor AKT 5: {:.4f}\n\tFor AKT 10: {:.4f}".format((auc_akt_5 - auc_re) / auc_re, (auc_akt_10 - auc_re) / auc_re))

Jumpstarts boxplot


In [ ]:
bp = plt.boxplot([rewards_re[:,0], rewards_akt_5[:,0]])
plt.xticks([1,2], ["REINFORCE", "TLA"])
plt.ylabel("Reward")
plt.ylim(ymin=0)
plt.show()

In [ ]:
bp["boxes"][0].get_ydata()

Asymptotic performance boxplot


In [ ]:
plt.boxplot([rewards_re[:,-1], rewards_akt[:,-1]])
plt.xticks([1,2], ["REINFORCE", "AKT"])
plt.ylim(ymin=0,ymax=210)
plt.show()

In [ ]: