Cyclopts Output Analysis

This notebook aides an analyst in visualizing output from a series of cyclopts executions.


In [521]:
from __future__ import print_function

%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import tables as t
from collections import defaultdict
import os
from cyclopts import tools, analysis


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

An expected table structure for a combined file looks something like

$ h5ls -r combined.h5 
/                        Group
/Family                  Group
/Family/ResourceExchange Group
/Family/ResourceExchange/ExchangeArcs Dataset {1154242/Inf}
/Family/ResourceExchange/ExchangeGroups Dataset {145858/Inf}
/Family/ResourceExchange/ExchangeInstProperties Dataset {3072/Inf}
/Family/ResourceExchange/ExchangeInstSolutionProperties Dataset {0/Inf}
/Family/ResourceExchange/ExchangeInstSolutions Dataset {0/Inf}
/Family/ResourceExchange/ExchangeNodes Dataset {1531039/Inf}
/Results                 Dataset {0/Inf}
/Species                 Group
/Species/RandomRequest   Group
/Species/RandomRequest/RandomRequestParameters Dataset {3072/Inf}

In [531]:
prefix = '../../exec/run_results/'
fname = 'combined.h5'
dirname = 'small-random-0.5'
f = t.open_file(os.path.join(prefix, dirname, fname), mode='r')

fam_path = '/'.join(['', 'Family', 'ResourceExchange'])
sp_path = '/'.join(['', 'Species', 'RandomRequest'])

props = f.get_node('/'.join([fam_path, 'ExchangeInstProperties']))
samplers = f.get_node('/'.join([sp_path, 'RandomRequestParameters']))
grps = f.get_node('/'.join([fam_path, 'ExchangeGroups']))
pref_flows = f.get_node('/'.join([fam_path, 'ExchangeInstSolutionProperties']))
flows = f.get_node('/'.join([fam_path, 'ExchangeInstSolutions']))
results = f.get_node('/'.join(['', 'Results']))

In [532]:
ninsts = props.nrows
nruns = results.nrows
print("number of instances: {0}, number of runs: {1}, approx. runs per instance: {2}".format(ninsts, nruns, float(nruns) / ninsts))

solvers = sorted(list(set(x['solver'] for x in results.iterrows())))
print("solvers used: {0}".format(", ".join(solvers)))


number of instances: 3072, number of runs: 9216, approx. runs per instance: 3.0
solvers used: cbc, clp, greedy

Result Characterization


In [535]:
soln_info = {x['solnid']: (x['instid'], x['solver']) for x in results.iterrows()}

times = defaultdict(list)
for x in results.iterrows():
    times[x['solver']].append((x['instid'], x['time']))

objectives = defaultdict(list)
for x in results.iterrows():
    objectives[x['solver']].append((x['instid'], x['objective']))

pref_flow = defaultdict(list)
for x in pref_flows.iterrows():
    pref_flow[soln_info[x['solnid']][0]].append((x['instid'], x['pref_flow']))

sum_flow = defaultdict(float)
for x in flows.iterrows():
    sum_flow[x['solnid']] += x['flow']

total_flow = defaultdict(list)
for sid, flow in sum_flow.items():
    solver = solns[sid][1]
    iid = solns[sid][0]
    total_flow[solver].append((iid, flow))

bytime = {x[1]: (x[0], k) for k, l in times.items() for x in l}
maxtime = bytime[max(bytime.keys())]
print('The instance with the maximum time required was {0} '
      'using solver {1}, taking {2} seconds'.format(tools.uuidhex(maxtime[0]), maxtime[1], max(bytime.keys())))


The instance with the maximum time required was 4029cc43044143be816d6138e67bbe9b using solver cbc, taking 10890.146446 seconds

In [562]:
cbc_flows = {x[0]: x[1] for x in total_flow['cbc']}
cbc_obj = {x[0]: x[1] for x in objectives['cbc']}
cbc_times = {x[0]: x[1] for x in times['cbc']}

iids = [k for k, v in cbc_obj.items() if v < 1e8]

y = [cbc_obj[i] / cbc_flows[i] for i in iids]
x = [cbc_times[i] for i in iids]
plt.scatter(x, y)
plt.xlim(0)
plt.ylim(0)
plt.xlabel('solution time (s)')
plt.ylabel('ratio of objective function to total system flow')
#max(cbc_obj.values())


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-562-9e9805b058b5> in <module>()
      5 iids = [k for k, v in cbc_obj.items() if v < 1e8]
      6 
----> 7 y = [cbc_obj[i] / cbc_flows[i] for i in iids]
      8 x = [cbc_times[i] for i in iids]
      9 plt.scatter(x, y)

KeyError: 'w\x8c\x16Q\x82\x8aH\x83\x86\x05G\xf7r\xc6T\xeb'

In [537]:
keys = [k for k in bytime.keys() if k > 5000]
k = keys[-1]
for k in keys:
    print(k, tools.uuidhex(bytime[k][0]))


10839.964075 d6cb537f580e4bebbefeb91f967247e2
10870.718399 58c96e6f0a21439e9b33cca867da74db
10874.6638 8fb5f405a7554cdc8b0fc34d47f4536a
10850.427484 49be7f9f824b428aaaebf0cb5635fb54
10890.146446 4029cc43044143be816d6138e67bbe9b
10847.111989 2035a8de3c9647389217488a15ad14d2
10845.042303 657133f342124a38ba03f7c3f58a30c7
9120.425484 3f9fd4de5b224292ac79227e43ab7fea
10881.019833 4bfd3655bad14d8283dafa51d2c860e6
5935.635647 64599c3a3f6642e19d662de132c3b153
10856.186609 c4c765984d634591adad23ceb1067234
10838.897237 23a8491ace9c48119a20b40a2de395d7
10877.671343 bf2f481f377441c8906d57948304f604
10874.074889 b34f0046cf084e54bc311af97fe5c55b
10881.390778 e8a2c18a10f0493e9b4b5dbe1685a381
10874.141879 2488dccc8b004ca68aa09a74419d0a5e
10880.197959 d37d5358b67241178cc2e73a1b39dcee
10868.769696 40220e4561be4cdb842fbad645c2d456

In [538]:
xname = 'Request Node'
xhandle = 'n_u_nodes'
analysis.plot_xy(props, xhandle, times)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Solution Time')


Out[538]:
<matplotlib.text.Text at 0x7f0d74ecb0d0>

In [539]:
xname = 'Arc'
xhandle = 'n_arcs'
analysis.plot_xy(props, xhandle, times)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Solution Time')


Out[539]:
<matplotlib.text.Text at 0x7f0d74b85e10>

In [540]:
xname = 'Constraint'
xhandle = 'n_constrs'
analysis.plot_xy(props, xhandle, times)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Solution Time')


Out[540]:
<matplotlib.text.Text at 0x7f0d74a93ad0>

In [541]:
xname = 'Arc'
xhandle = 'n_arcs'
yname = 'Constraint'
yhandle = 'n_constrs'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Number of {0}s'.format(yname))
ax.set_zlabel('Solution Time (s)')


Out[541]:
<matplotlib.text.Text at 0x7f0d74a43790>

In [542]:
xname = 'Arc'
xhandle = 'n_arcs'
yname = 'Exclusivity'
yhandle = 'excl_frac'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Fraction of Exclusive Arcs')
ax.set_zlabel('Solution Time (s)')


Out[542]:
<matplotlib.text.Text at 0x7f0d7471b090>

In [543]:
xname = 'Request Node'
xhandle = 'n_u_nodes'
yname = 'Exclusivity'
yhandle = 'excl_frac'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Fraction of Exclusive Arcs')
ax.set_zlabel('Solution Time (s)')


Out[543]:
<matplotlib.text.Text at 0x7f0d744f31d0>

In [544]:
name = 'Request Group'
xhandle = 'n_u_grps'
yname = 'Exclusivity'
yhandle = 'excl_frac'
ax = analysis.plot_xyz(props, xhandle, props, yhandle, times)
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Fraction of Exclusive Arcs')
ax.set_zlabel('Solution Time (s)')


Out[544]:
<matplotlib.text.Text at 0x7f0d74453090>

In [545]:
xname = 'Arc'
xhandle = 'n_arcs'
yname = 'Request Qty'
yhandle = 'req_qty_avg'

param_to_inst = {}
for x in props.iterrows():
    param_to_inst[x['paramid']] = x['instid']
    
ax = analysis.plot_xyz(props, xhandle, samplers, yhandle, times, toinst=param_to_inst, fromkey='paramid')
ax.set_xlabel('Number of {0}s'.format(xname))
ax.set_ylabel('Number of {0}s'.format(yname))
ax.set_zlabel('Solution Time (s)')


Out[545]:
<matplotlib.text.Text at 0x7f0d740e4490>

In [546]:
xname = 'Arc'
xhandle = 'n_arcs'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')


Out[546]:
<matplotlib.text.Text at 0x7f0d73ecb710>

In [547]:
xname = 'Constraints'
xhandle = 'n_constrs'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')


Out[547]:
<matplotlib.text.Text at 0x7f0d73e1a710>

In [548]:
name = 'Request Node'
xhandle = 'n_u_nodes'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')


Out[548]:
<matplotlib.text.Text at 0x7f0d73c6cc50>

In [549]:
xname = 'Request Group'
xhandle = 'n_u_grps'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')


Out[549]:
<matplotlib.text.Text at 0x7f0d73bfe1d0>

In [550]:
xname = 'Supply Group'
xhandle = 'n_v_grps'
analysis.plot_xy(props, xhandle, objectives)
plt.xlabel('Number of {0}s'.format(xname))
plt.ylabel('Objective Value')


Out[550]:
<matplotlib.text.Text at 0x7f0d73b42c50>

In [551]:
diff = lambda x, y: (x - y)
rel_diff = lambda x, y: (x - y) / y

def solver_diff(vals, base, baseline, f=rel_diff):
    diffs = defaultdict(list)
    max_diff = defaultdict(float)
    max_diff_id = defaultdict(str)
    for s, l in vals.iteritems():
        if s == base:
            continue
        for x in l:
            diff = f(x[1], baseline[x[0]])
            diffs[s].append(diff)
            if diff > max_diff[s]:
                max_diff_id[s] = x[0] 
                max_diff[s] = diff 
    return diffs, max_diff, max_diff_id

In [552]:
vals = objectives
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, max_d, max_id = solver_diff(vals, base, baseline, f=rel_diff)
#d = {k: np.array(v) for k, v in d.items()}
#idxs = np.where(d['greedy'] < 1e3)
#d = {k: v[idxs] for k, v in d.items() if v < 1e3}
n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Objective Difference')


Out[552]:
<matplotlib.text.Text at 0x7f0d79600290>

In [553]:
vals = times
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, _, _= solver_diff(vals, base, baseline, f=rel_diff)

n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Time Difference')


Out[553]:
<matplotlib.text.Text at 0x7f0d738c53d0>

In [555]:
diff = lambda x, y: (x - y)
vals = pref_flow
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, _, _= solver_diff(vals, base, baseline, f=rel_diff)

n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Flow x Preference Difference')


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-555-d875a3800b81> in <module>()
      3 base = 'cbc'
      4 baseline = {x[0]: x[1] for x in vals[base]}
----> 5 d, _, _= solver_diff(vals, base, baseline, f=rel_diff)
      6 
      7 n, bins, patches = plt.hist(d.values(), 20, label=d.keys())

<ipython-input-551-ffd49ff414c2> in solver_diff(vals, base, baseline, f)
     10             continue
     11         for x in l:
---> 12             diff = f(x[1], baseline[x[0]])
     13             diffs[s].append(diff)
     14             if diff > max_diff[s]:

KeyError: '@M\x8buR\xdfO\x1d\x8d\xf9T\x10`z\x1e\x1e'

In [563]:
vals = total_flow
base = 'cbc'
baseline = {x[0]: x[1] for x in vals[base]}
d, _, _= solver_diff(vals, base, baseline, f=rel_diff)

n, bins, patches = plt.hist(d.values(), 20, label=d.keys())
plt.legend()
plt.xlabel('Relative Difference')
plt.ylabel('Number of Instances')
plt.title(r'Distribution of Total Flow Difference')


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-563-07783751facb> in <module>()
      2 base = 'cbc'
      3 baseline = {x[0]: x[1] for x in vals[base]}
----> 4 d, _, _= solver_diff(vals, base, baseline, f=rel_diff)
      5 
      6 n, bins, patches = plt.hist(d.values(), 20, label=d.keys())

<ipython-input-551-ffd49ff414c2> in solver_diff(vals, base, baseline, f)
     10             continue
     11         for x in l:
---> 12             diff = f(x[1], baseline[x[0]])
     13             diffs[s].append(diff)
     14             if diff > max_diff[s]:

KeyError: 'w\x8c\x16Q\x82\x8aH\x83\x86\x05G\xf7r\xc6T\xeb'

In [ ]:


In [ ]:


In [514]: