In [1]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import pyplot as pl
import numpy as np
import pickle
In [2]:
def average_loss(x):
loss = 0.0
n = float(len(x))
for h in x:
loss += h['history'][0]
return loss / n
def average_accuracy(x):
accuracy = 0.0
n = float(len(x))
for h in x:
accuracy += h['history'][1]
return accuracy / n
def std_accuracy(x):
a = [a['history'][1] for a in x]
a = np.asarray(a)
return np.std(a)
def std_loss(x):
a = [a['history'][0] for a in x]
a = np.asarray(a)
return np.std(a)
def compute_plot_metrics(history):
bin_delta = 5.0
origin_time = float("inf")
for h in history:
t = h['timestamp']
if t < origin_time:
origin_time = t
# Normalize wrt orgin time.
for h in history:
h['timestamp'] -= origin_time
max_time = float("-inf")
# Compute max time.
for h in history:
t = h['timestamp']
if t > max_time:
max_time = t
# Computed binned data.
x = []
y = []
error = []
for i in range(0, int(max_time + 1)):
start = float(i)
d = [h for h in history if h['timestamp'] >= start and h['timestamp'] < (start + bin_delta)]
if len(d) > 0:
x.append(i)
avg_loss = average_loss(d)
avg_accuracy = average_accuracy(d)
std_a = std_accuracy(d)
std_l = std_loss(d)
y.append(avg_accuracy)
error.append(std_a)
# Convert lists to Numpy arrays.
x = np.asarray(x)
y = np.asarray(y)
error = np.asarray(error)
return x, y, error
In [ ]:
# Load the AGN training and validation metrics.
with open("agn_results.pickle", 'rb') as handle:
data = pickle.load(handle)
In [3]:
# Obtain the distributed hyperparameters that have been evaluated.
workers = np.arange(10, 41, 5)
lambdas = np.arange(10, 41, 5)
In [5]:
def load_data(data):
# Obtain AGN plotting data.
plotting_data = {}
for w in workers:
plotting_data[w] = {}
for l in lambdas:
plotting_data[w][l] = {}
d = data[w][l][0]
# Obtain the results from the experimental format.
training_accuracy = d['training_accuracy']
validation_accuracy = d['validation_accuracy']
training_time = d['training_time']
x, y, error = compute_plot_metrics(d['history'])
# Store metrics in plotting format.
plotting_data[w][l]['t_acc'] = training_accuracy
plotting_data[w][l]['v_acc'] = validation_accuracy
plotting_data[w][l]['training_time'] = training_time
plotting_data[w][l]['x'] = x
plotting_data[w][l]['y'] = y
plotting_data[w][l]['error'] = error
return plotting_data
In [ ]:
# Obtain AGN plotting data.
plotting_data = {}
for w in workers:
plotting_data[w] = {}
for l in lambdas:
print(str(w) + " - " + str(l))
plotting_data[w][l] = {}
d = data[w][l][0]
# Obtain the results from the experimental format.
training_accuracy = d['training_accuracy']
validation_accuracy = d['validation_accuracy']
training_time = d['training_time']
x, y, error = compute_plot_metrics(d['history'])
# Store metrics in plotting format.
plotting_data[w][l]['t_acc'] = training_accuracy
plotting_data[w][l]['v_acc'] = validation_accuracy
plotting_data[w][l]['training_time'] = training_time
plotting_data[w][l]['x'] = x
plotting_data[w][l]['y'] = y
plotting_data[w][l]['error'] = error
In [ ]:
optimizer = "AGN"
for l in lambdas:
title = "Optimizer Training Accuracy\n"
handles = []
for w in workers:
t = "{0:.2f}".format(plotting_data[w][l]['training_time']) + "s"
t_acc = "{0:.2f}".format(plotting_data[w][l]['t_acc'] * 100) + "%"
v_acc = "{0:.2f}".format(plotting_data[w][l]['v_acc'] * 100) + "%"
title += optimizer + " "
title += r"$n = $" + str(w)
title += r" $\lambda = $" + str(l) + ":"
title += r" $t = $" + t
title += r" $acc = $" + t_acc
title += r" $val = $" + v_acc + "\n"
x = plotting_data[w][l]['x']
y = plotting_data[w][l]['y']
error = plotting_data[w][l]['error']
p, = pl.plot(x, y, label=optimizer + ' ' + r"$n = $" + str(w) + r" $\lambda = $" + str(l))
#pl.fill_between(x, y - error, y + error, alpha=0.5)
handles.append(p)
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.xlim([-.1, 1200 + 0])
pl.ylim([.9, 1])
pl.xlabel("Seconds")
pl.ylabel("Training Accuracy")
pl.title(title)
pl.legend(handles=handles)
pl.show()
In [ ]:
optimizer = "AGN"
for w in workers:
title = "Optimizer Training Accuracy\n"
handles = []
for l in lambdas:
t = "{0:.2f}".format(plotting_data[w][l]['training_time']) + "s"
t_acc = "{0:.2f}".format(plotting_data[w][l]['t_acc'] * 100) + "%"
v_acc = "{0:.2f}".format(plotting_data[w][l]['v_acc'] * 100) + "%"
title += optimizer + " "
title += r"$n = $" + str(w)
title += r" $\lambda = $" + str(l) + ":"
title += r" $t = $" + t
title += r" $acc = $" + t_acc
title += r" $val = $" + v_acc + "\n"
x = plotting_data[w][l]['x']
y = plotting_data[w][l]['y']
error = plotting_data[w][l]['error']
p, = pl.plot(x, y, label=optimizer + ' ' + r"$n = $" + str(w) + r" $\lambda = $" + str(l))
#pl.fill_between(x, y - error, y + error, alpha=0.5)
handles.append(p)
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.xlim([-.1, 1200 + 0])
pl.ylim([.9, 1])
pl.xlabel("Seconds")
pl.ylabel("Training Accuracy")
pl.title(title)
pl.legend(handles=handles)
pl.show()
In [ ]:
# Load AEASGD training and validation metrics.
with open("aeasgd_results.pickle", 'rb') as handle:
data = pickle.load(handle)
In [ ]:
# Obtain EASGD plotting data.
plotting_data = {}
for w in workers:
plotting_data[w] = {}
for l in lambdas:
print(str(w) + " - " + str(l))
plotting_data[w][l] = {}
d = data[w][l][0]
# Obtain the results from the experimental format.
training_accuracy = d['training_accuracy']
validation_accuracy = d['validation_accuracy']
training_time = d['training_time']
x, y, error = compute_plot_metrics(d['history'])
# Store metrics in plotting format.
plotting_data[w][l]['t_acc'] = training_accuracy
plotting_data[w][l]['v_acc'] = validation_accuracy
plotting_data[w][l]['training_time'] = training_time
plotting_data[w][l]['x'] = x
plotting_data[w][l]['y'] = y
plotting_data[w][l]['error'] = error
In [ ]:
optimizer = "AEASGD"
for l in lambdas:
title = "Optimizer Training Accuracy\n"
handles = []
for w in workers:
t = "{0:.2f}".format(plotting_data[w][l]['training_time']) + "s"
t_acc = "{0:.2f}".format(plotting_data[w][l]['t_acc'] * 100) + "%"
v_acc = "{0:.2f}".format(plotting_data[w][l]['v_acc'] * 100) + "%"
title += optimizer + " "
title += r"$n = $" + str(w)
title += r" $\lambda = $" + str(l) + ":"
title += r" $t = $" + t
title += r" $acc = $" + t_acc
title += r" $val = $" + v_acc + "\n"
x = plotting_data[w][l]['x']
y = plotting_data[w][l]['y']
error = plotting_data[w][l]['error']
p, = pl.plot(x, y, label=optimizer + ' ' + r"$n = $" + str(w) + r" $\lambda = $" + str(l))
#pl.fill_between(x, y - error, y + error, alpha=0.5)
handles.append(p)
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.xlim([-.1, 1200 + 0])
pl.ylim([.9, 1])
pl.xlabel("Seconds")
pl.ylabel("Training Accuracy")
pl.title(title)
pl.legend(handles=handles)
pl.show()
In [ ]:
optimizer = "AEASGD"
for w in workers:
title = "Optimizer Training Accuracy\n"
handles = []
for l in lambdas:
t = "{0:.2f}".format(plotting_data[w][l]['training_time']) + "s"
t_acc = "{0:.2f}".format(plotting_data[w][l]['t_acc'] * 100) + "%"
v_acc = "{0:.2f}".format(plotting_data[w][l]['v_acc'] * 100) + "%"
title += optimizer + " "
title += r"$n = $" + str(w)
title += r" $\lambda = $" + str(l) + ":"
title += r" $t = $" + t
title += r" $acc = $" + t_acc
title += r" $val = $" + v_acc + "\n"
x = plotting_data[w][l]['x']
y = plotting_data[w][l]['y']
error = plotting_data[w][l]['error']
p, = pl.plot(x, y, label=optimizer + ' ' + r"$n = $" + str(w) + r" $\lambda = $" + str(l))
#pl.fill_between(x, y - error, y + error, alpha=0.5)
handles.append(p)
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.xlim([-.1, 1200 + 0])
pl.ylim([.9, 1])
pl.xlabel("Seconds")
pl.ylabel("Training Accuracy")
pl.title(title)
pl.legend(handles=handles)
pl.show()
In [10]:
# Obtain the distributed hyperparameters that have been evaluated.
workers = np.arange(20, 41, 5)
lambdas = np.arange(10, 21, 5)
# Load the AGN training and validation metrics.
with open("agn_results.pickle", 'rb') as handle:
data = pickle.load(handle)
plotting_agn = load_data(data)
# Load ADAG-AGN training and validation metrics.
with open("adag_results.pickle", "rb") as handle:
data = pickle.load(handle)
plotting_adag = load_data(data)
In [ ]:
In [20]:
title = "Optimizer Training Accuracy\n"
handles = []
for o in ["ADAG", "AGN"]:
for w in [30]:
for l in [10]:
if o is "AGN":
plotting_data = plotting_agn
else:
plotting_data = plotting_adag
t = "{0:.2f}".format(plotting_data[w][l]['training_time']) + "s"
t_acc = "{0:.2f}".format(plotting_data[w][l]['t_acc'] * 100) + "%"
v_acc = "{0:.2f}".format(plotting_data[w][l]['v_acc'] * 100) + "%"
title += o + ":"
title += r" $t = $" + t
title += r" $acc = $" + t_acc
title += r" $val = $" + v_acc + "\n"
x = plotting_data[w][l]['x']
y = plotting_data[w][l]['y']
error = plotting_data[w][l]['error']
p, = pl.plot(x, y, label=o + ' ' + r"$n = $" + str(w) + r" $\lambda = $" + str(l))
pl.fill_between(x, y - error, y + error, alpha=0.5)
handles.append(p)
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.xlim([-.1, 1200 + 0])
pl.ylim([.7, 1])
pl.xlabel("Seconds")
pl.ylabel("Training Accuracy")
pl.title(title)
pl.legend(handles=handles)
pl.show()
In [ ]:
d_a = plotting_agn
d_e = plotting_easgd
for w in workers:
for l in lambdas:
t_a = "{0:.2f}".format(d_a[w][l]['training_time'])
t_acc_a = "{0:.2f}".format(d_a[w][l]['t_acc'] * 100)
v_acc_a = "{0:.2f}".format(d_a[w][l]['v_acc'] * 100)
t_e = "{0:.2f}".format(d_e[w][l]['training_time'])
t_acc_e = "{0:.2f}".format(d_e[w][l]['t_acc'] * 100)
v_acc_e = "{0:.2f}".format(d_e[w][l]['v_acc'] * 100)
line = str(w) + " & "
line += str(l) + " & "
if float(t_a) < float(t_e):
line += "\\textbf{" + t_a + "s} & "
else:
line += t_a + "s & "
if float(t_acc_a) > float(t_acc_e):
line += "\\textbf{" + t_acc_a + "\%} & "
else:
line += t_acc_a + "\% & "
if float(v_acc_a) > float(v_acc_e):
line += "\\textbf{" + v_acc_a + "\%} & "
else:
line += v_acc_a + "\% & "
if float(t_a) > float(t_e):
line += "\\textbf{" + t_e + "s} & "
else:
line += t_e + "s & "
if float(t_acc_a) < float(t_acc_e):
line += "\\textbf{" + t_acc_e + "\%} & "
else:
line += t_acc_e + "\% & "
if float(v_acc_a) < float(v_acc_e):
line += "\\textbf{" + v_acc_e + "\%} "
else:
line += v_acc_e + "\% "
line += " \\\ \n\\hline"
print(line)
In [ ]:
d_a = plotting_agn
d_e = plotting_easgd
easgd = []
agn = []
# Obtain AGN metrics.
for w in workers:
best_t_acc = 0.0
best_v_acc = 0.0
for l in lambdas:
if d_a[w][l]['t_acc'] > best_t_acc:
best_t_acc = d_a[w][l]['t_acc']
if d_a[w][l]['v_acc'] > best_v_acc:
best_v_acc = d_a[w][l]['v_acc']
agn.append((best_t_acc, best_v_acc))
# Obtain AEASGD
for w in workers:
best_t_acc = 0.0
best_v_acc = 0.0
for l in lambdas:
#if d_e[w][l]['t_acc'] > best_t_acc:
best_t_acc += d_e[w][l]['t_acc']
#if d_e[w][l]['v_acc'] > best_v_acc:
best_v_acc += d_e[w][l]['v_acc']
best_t_acc /= float(len(lambdas))
best_v_acc /= float(len(lambdas))
easgd.append((best_t_acc, best_v_acc))
In [ ]:
agn
In [ ]:
easgd
In [ ]:
title = "Accuracy vs. Number of workers"
x_label = "Workers"
y_label = "Accuracy"
x = np.arange(10, 41, 5)
agn_t_acc = [x[0] for x in agn]
agn_v_acc = [x[1] for x in agn]
aeasgd_t_acc = [x[0] for x in easgd]
aeasgd_v_acc = [x[1] for x in easgd]
handles = []
h, = pl.plot(x, agn_t_acc, label="AGN Training", linewidth=3)
handles.append(h)
h, = pl.plot(x, agn_v_acc, label="AGN Validation", linewidth=3)
handles.append(h)
h, = pl.plot(x, aeasgd_t_acc, label="AEASGD Training", linewidth=3)
handles.append(h)
h, = pl.plot(x, aeasgd_v_acc, label="AEASGD Validation", linewidth=3)
handles.append(h)
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.ylim([.965, 1])
pl.xlabel(x_label)
pl.ylabel(y_label)
pl.title(title)
pl.legend(handles=handles)
pl.show()
In [ ]:
from scipy.interpolate import InterpolatedUnivariateSpline
In [ ]:
s = 0
n = 0
for w in workers:
for l in lambdas:
n += 1
# Obtain AGN metrics.
agn_x = np.asarray(plotting_agn[w][l]['x'])
agn_y = np.asarray(plotting_agn[w][l]['y'])
agn_t = float(agn_x[len(agn_x) - 1])
# Obtain AEASGD metrics.
easgd_x = np.asarray(plotting_easgd[w][l]['x'])
easgd_y = np.asarray(plotting_easgd[w][l]['y'])
easgd_t = float(easgd_x[len(easgd_x) - 1])
# Check which m needs to be used.
if agn_t <= easgd_t:
m = agn_t
else:
m = easgd_t
# Compute the performance surfaces.
f = InterpolatedUnivariateSpline(agn_x, agn_y, k=1)
s_agn = f.integral(0, m)
f = InterpolatedUnivariateSpline(easgd_x, easgd_y, k=1)
s_easgd = f.integral(0, m)
# Compute the ratio of the performance surfaces.
r = s_agn / s_easgd
if r > 1:
s += 1
if w == 40 and l == 15:
pl.title("Temporal Efficiency AGN vs. AEASGD")
handles = []
h, = pl.plot(agn_x, agn_y, label=r"AGN $n = $" + str(w) + r" $\lambda = $" + str(l))
handles.append(h)
h, = pl.plot(easgd_x, easgd_y, label=r"AEASGD $n = $" + str(w) + r" $\lambda = $" + str(l))
handles.append(h)
pl.xlim([0, m])
pl.xlabel("Seconds")
pl.ylabel("Accuracy")
fig = matplotlib.pyplot.gcf()
fig.set_dpi(200)
pl.grid(True)
pl.legend(handles=handles)
pl.show()
#if r > 1:
# print(str(w) + " & " + str(l) + " & \\textbf{" + "{0:.3f}".format(r) + "} \\\ \n\\hline")
#else:
# print(str(w) + " & " + str(l) + " & " + "{0:.3f}".format(r) + " \\\ \n\\hline")
print(float(s)/float(n))
In [ ]: