In [521]:
from __future__ import print_function
%matplotlib inline
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import numpy as np
import tables as t
from collections import defaultdict
import os
from cyclopts import tools, analysis
An expected table structure for a combined file looks something like
$ h5ls -r combined.h5
/ Group
/Family Group
/Family/ResourceExchange Group
/Family/ResourceExchange/ExchangeArcs Dataset {1154242/Inf}
/Family/ResourceExchange/ExchangeGroups Dataset {145858/Inf}
/Family/ResourceExchange/ExchangeInstProperties Dataset {3072/Inf}
/Family/ResourceExchange/ExchangeInstSolutionProperties Dataset {0/Inf}
/Family/ResourceExchange/ExchangeInstSolutions Dataset {0/Inf}
/Family/ResourceExchange/ExchangeNodes Dataset {1531039/Inf}
/Results Dataset {0/Inf}
/Species Group
/Species/RandomRequest Group
/Species/RandomRequest/RandomRequestParameters Dataset {3072/Inf}
In [531]:
prefix = '../../exec/run_results/'
fname = 'combined.h5'
dirname = 'small-random-0.5'
f = t.open_file(os.path.join(prefix, dirname, fname), mode='r')
fam_path = '/'.join(['', 'Family', 'ResourceExchange'])
sp_path = '/'.join(['', 'Species', 'RandomRequest'])
props = f.get_node('/'.join([fam_path, 'ExchangeInstProperties']))
samplers = f.get_node('/'.join([sp_path, 'RandomRequestParameters']))
grps = f.get_node('/'.join([fam_path, 'ExchangeGroups']))
pref_flows = f.get_node('/'.join([fam_path, 'ExchangeInstSolutionProperties']))
flows = f.get_node('/'.join([fam_path, 'ExchangeInstSolutions']))
results = f.get_node('/'.join(['', 'Results']))
In [532]:
ninsts = props.nrows
nruns = results.nrows
print("number of instances: {0}, number of runs: {1}, approx. runs per instance: {2}".format(ninsts, nruns, float(nruns) / ninsts))
solvers = sorted(list(set(x['solver'] for x in results.iterrows())))
print("solvers used: {0}".format(", ".join(solvers)))
In [535]:
soln_info = {x['solnid']: (x['instid'], x['solver']) for x in results.iterrows()}
times = defaultdict(list)
for x in results.iterrows():
times[x['solver']].append((x['instid'], x['time']))
objectives = defaultdict(list)
for x in results.iterrows():
objectives[x['solver']].append((x['instid'], x['objective']))
pref_flow = defaultdict(list)
for x in pref_flows.iterrows():
pref_flow[soln_info[x['solnid']][0]].append((x['instid'], x['pref_flow']))
sum_flow = defaultdict(float)
for x in flows.iterrows():
sum_flow[x['solnid']] += x['flow']
total_flow = defaultdict(list)
for sid, flow in sum_flow.items():
solver = solns[sid][1]
iid = solns[sid][0]
total_flow[solver].append((iid, flow))
bytime = {x[1]: (x[0], k) for k, l in times.items() for x in l}
maxtime = bytime[max(bytime.keys())]
print('The instance with the maximum time required was {0} '
'using solver {1}, taking {2} seconds'.format(tools.uuidhex(maxtime[0]), maxtime[1], max(bytime.keys())))
In [562]:
cbc_flows = {x[0]: x[1] for x in total_flow['cbc']}
cbc_obj = {x[0]: x[1] for x in objectives['cbc']}
cbc_times = {x[0]: x[1] for x in times['cbc']}
iids = [k for k, v in cbc_obj.items() if v < 1e8]
y = [cbc_obj[i] / cbc_flows[i] for i in iids]
x = [cbc_times[i] for i in iids]
plt.scatter(x, y)
plt.xlim(0)
plt.ylim(0)
plt.xlabel('solution time (s)')
plt.ylabel('ratio of objective function to total system flow')
#max(cbc_obj.values())
In [537]:
keys = [k for k in bytime.keys() if k > 5000]
k = keys[-1]
for k in keys:
print(k, tools.uuidhex(bytime[k][0]))
In [538]:
xname = 'Request Node'
xhandle = 'n_u_nodes'
analysis.plot_xy(props, xhandle, times)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Solution Time')
Out[538]:
In [539]:
xname = 'Arc'
xhandle = 'n_arcs'
analysis.plot_xy(props, xhandle, times)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Solution Time')
Out[539]:
In [540]:
xname = 'Constraint'
xhandle = 'n_constrs'
analysis.plot_xy(props, xhandle, times)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Solution Time')
Out[540]:
In [541]:
xname = 'Arc'
xhandle = 'n_arcs'
yname = 'Constraint'
yhandle = 'n_constrs'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Number of {0}s'.format(yname))
ax.set_zlabel('Solution Time (s)')
Out[541]:
In [542]:
xname = 'Arc'
xhandle = 'n_arcs'
yname = 'Exclusivity'
yhandle = 'excl_frac'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Fraction of Exclusive Arcs')
ax.set_zlabel('Solution Time (s)')
Out[542]:
In [543]:
xname = 'Request Node'
xhandle = 'n_u_nodes'
yname = 'Exclusivity'
yhandle = 'excl_frac'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Fraction of Exclusive Arcs')
ax.set_zlabel('Solution Time (s)')
Out[543]:
In [544]:
name = 'Request Group'
xhandle = 'n_u_grps'
yname = 'Exclusivity'
yhandle = 'excl_frac'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Fraction of Exclusive Arcs')
ax.set_zlabel('Solution Time (s)')
Out[544]:
In [545]:
xname = 'Arc'
xhandle = 'n_arcs'
yname = 'Request Qty'
yhandle = 'req_qty_avg'
param_to_inst = {}
for x in props.iterrows():
param_to_inst[x['paramid']] = x['instid']
ax = analysis.plot_xyz(props, xhandle, samplers, yhandle, times, toinst=param_to_inst, fromkey='paramid')
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Number of {0}s'.format(yname))
ax.set_zlabel('Solution Time (s)')
Out[545]:
In [546]:
xname = 'Arc'
xhandle = 'n_arcs'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')
Out[546]:
In [547]:
xname = 'Constraints'
xhandle = 'n_constrs'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')
Out[547]:
In [548]:
name = 'Request Node'
xhandle = 'n_u_nodes'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')
Out[548]:
In [549]:
xname = 'Request Group'
xhandle = 'n_u_grps'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')
Out[549]:
In [550]:
xname = 'Supply Group'
xhandle = 'n_v_grps'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')
Out[550]:
In [551]:
diff = lambda x, y: (x - y)
rel_diff = lambda x, y: (x - y) / y
def solver_diff(vals, base, baseline, f=rel_diff):
diffs = defaultdict(list)
max_diff = defaultdict(float)
max_diff_id = defaultdict(str)
for s, l in vals.iteritems():
if s == base:
continue
for x in l:
diff = f(x[1], baseline[x[0]])
diffs[s].append(diff)
if diff > max_diff[s]:
max_diff_id[s] = x[0]
max_diff[s] = diff
return diffs, max_diff, max_diff_id
In [552]:
vals = objectives
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, max_d, max_id = solver_diff(vals, base, baseline, f=rel_diff)
#d = {k: np.array(v) for k, v in d.items()}
#idxs = np.where(d['greedy'] < 1e3)
#d = {k: v[idxs] for k, v in d.items() if v < 1e3}
n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Objective Difference')
Out[552]:
In [553]:
vals = times
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, _, _= solver_diff(vals, base, baseline, f=rel_diff)
n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Time Difference')
Out[553]:
In [555]:
diff = lambda x, y: (x - y)
vals = pref_flow
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, _, _= solver_diff(vals, base, baseline, f=rel_diff)
n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Flow x Preference Difference')
In [563]:
vals = total_flow
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, _, _= solver_diff(vals, base, baseline, f=rel_diff)
n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Total Flow Difference')
In [ ]:
In [ ]:
In [514]: