In [1]:
import sqlite3
import numpy as np
import scipy.stats as st
%pylab inline
class GeomMean:
def __init__(self):
self.values = []
def step(self, value):
self.values += [value]
def finalize(self):
return st.gmean(self.values)
class Wilcoxon:
def __init__(self):
self.valuesLeft = []
self.valuesRight = []
def step(self, value1, value2):
self.valuesLeft += [value1]
self.valuesRight += [value2]
def finalize(self):
[t, prob] = st.wilcoxon(self.valuesLeft, self.valuesRight)
return 1.0 if prob < 0.05 else 0.0
def boldify(floatStr):
split_num = floatStr.split('.')
return "\\mathbf{" + split_num[0]+"}.\\mathbf{"+split_num[1] + "}"
conn = sqlite3.connect('results.db')
conn.create_aggregate("GeomMean", 1, GeomMean)
conn.create_aggregate("Wilcoxon", 2, Wilcoxon)
c = conn.cursor()
readable = { "NUMBERJACK":"Toulbar2", "GECODE":"Gecode", "OR_TOOLS":"OR-Tools", "CHOCO":"Choco",
"JACOP":"JaCoP", "G12":"G12", "GECODE_NAT" : "Native Gecode"}
readableProblems = { "on-call-rostering":"On-call Rostering", "mspsp":"MSPSP", "soft-queens":"Soft N-Queens",
"talent-scheduling":"Talent Scheduling", "photo":"Photo Placement"}
from collections import defaultdict
problemToInstance = defaultdict(list)
c.execute("SELECT Problem, Count(Distinct Instance) as Instances FROM JobResult Group By Problem")
for row in c.fetchall():
problemToInstance[row[0]] = row[1]
c.execute("SELECT COUNT(*) FROM ( SELECT Distinct Instance FROM JobResult )")
res = c.fetchone()
numberProblems = res[0]
print "We tried", numberProblems, "instances."
In [2]:
# now we do the solver comparison
problemToInstance = defaultdict(list)
c.execute("SELECT Problem, Count(Distinct Instance) as Instances FROM JobResult Group By Problem")
for row in c.fetchall():
problemToInstance[row[0]] = row[1]
c.execute("SELECT COUNT(*) FROM ( SELECT Distinct Instance FROM JobResult )")
res = c.fetchone()
numberProblems = res[0]
print "We tried", numberProblems, "instances."
scriptFile = open("query-native-solver-comparison-pure-views.sql", 'r')
script = scriptFile.read()
scriptFile.close()
c.executescript(script)
conn.commit()
scriptFile = open("query-native-solver-comparison-pure.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
currProblem = ""
print "\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill} }l" + \
"".join(["d{1.5}" for i in range(0,1)]) + "cd{1.5}" + "".join(["d{1.1}" for i in range(0,2)]) + "}"
print "\\toprule"
print '''\\multicolumn{1}{c}{Solver} & \multicolumn{1}{c}{Time (secs)}
& \multicolumn{1}{c}{\\# Wins}
& \multicolumn{1}{c}{Objective}
& \multicolumn{1}{c}{\% Solved} & \multicolumn{1}{c}{\% Optimal} \\\\'''
for row in c.fetchall():
(problem, solverId, solverName, elapsed, elapsedSpan, relElapsed, \
objective, relObjective, wins, solved, optimally) = row
if currProblem != problem:
#print "Starting .... ", problem
currProblem = problem
print "\\midrule"
print "\\multicolumn{2}{l}{" + readableProblems[problem] + " ("+ str(problemToInstance[problem]) + " instances) } \\\\"
print "\\midrule"
print " ", readable[solverName], "&", '{0:.2f}'.format(elapsed),\
"\\quad ("+'{0:.2f}'.format(relElapsed)+")" "&", '{0:.0f}'.format(wins), \
"&", '{0:.2f}'.format(objective), "\\quad ("+'{0:.2f}'.format(relObjective)+")", "&", \
'{0:.2f}'.format(solved), "&",'{0:.2f}'.format(optimally), "\\\\"
print "\\bottomrule"
print "\\end{tabular*}"
In [48]:
scriptFile = open("query-native-vs-strictbab-overhead-views.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.executescript(script)
conn.commit()
# now we do the solver comparison
problemToInstance = defaultdict(list)
c.execute("SELECT Problem, Count(Distinct Instance) as Instances FROM PvsNativeSummary Group By Problem")
for row in c.fetchall():
problemToInstance[row[0]] = row[1]
c.execute("SELECT COUNT(*) FROM ( SELECT Distinct Instance FROM PvsNativeSummary )")
res = c.fetchone()
numberProblems = res[0]
print "We tried", numberProblems, "instances."
scriptFile = open("query-native-vs-strictbab-overhead.sql",'r')
script = scriptFile.read()
scriptFile.close()
currProblem = ""
print "\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill} }l" + \
"".join(["d{1.1}" for i in range(0,5)]) + "}"
print "\\toprule"
print '''\\multicolumn{1}{c}{Solver} & \multicolumn{1}{c}{Time Smyth}
& \multicolumn{1}{c}{Time Weighted}
& \multicolumn{1}{c}{Time Toulbar2}
& \multicolumn{1}{c}{Obj. Smyth} & \multicolumn{1}{c}{Obj. Weighted} \\\\'''
c.execute(script)
def boldify(floatStr):
split_num = floatStr.split('.')
return "\\textbf{" + split_num[0]+"}.\\textbf{"+split_num[1] + "}"
for row in c.fetchall():
(problem, solverName, elapsedSmyth, elapsedWeights, absoluteOverhead, relOverhead, weightsObj, smythObj, elapsedTb) \
= row
if currProblem != problem:
#print "Starting .... ", problem
currProblem = problem
print "\\midrule"
print "\\multicolumn{2}{l}{" + readableProblems[problem] + " ("+ str(problemToInstance[problem]) + " instances) } \\\\"
print "\\midrule"
if elapsedSmyth < elapsedWeights:
elapsedSmythText = boldify('{0:.2f}'.format(elapsedSmyth))
elapsedWeightsText = '{0:.2f}'.format(elapsedWeights)
else:
elapsedWeightsText = boldify('{0:.2f}'.format(elapsedWeights))
elapsedSmythText = '{0:.2f}'.format(elapsedSmyth)
print " ", readable[solverName], \
"&", elapsedSmythText,\
"&", elapsedWeightsText, "&", \
"\\emph{-}" if (currProblem == "mspsp" or currProblem == "talent-scheduling") \
else "\\emph{" + '{0:.2f}'.format(elapsedTb) + "}", \
"&", '{0:.2f}'.format(smythObj), "&", '{0:.2f}'.format(weightsObj), "\\\\"
currProblem = ""
print "\\bottomrule"
print "\\end{tabular*}"
In [13]:
scriptFile = open("query-dom-vs-nondom-views.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.executescript(script)
conn.commit()
scriptFile = open("query-dom-vs-nondom-query.sql",'r')
script = scriptFile.read()
scriptFile.close()
currProblem = ""
print "\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill} }l" + \
"".join(["d{1.1}" for i in range(0,4)]) + "}"
print "\\toprule"
print '''\\multicolumn{1}{c}{Problem} & \multicolumn{1}{c}{Time Non-Dominated BaB}
& \multicolumn{1}{c}{Time Strict BaB}
& \multicolumn{1}{c}{Absolute Overhead}
& \multicolumn{1}{c}{Relative Overhead} \\\\'''
print "\\midrule"
c.execute(script)
for row in c.fetchall():
(problem, nonDomElapsed, domElapsed, absoluteOverhead, relOverhead, significant) = row
if domElapsed < nonDomElapsed:
domElapsedText = boldify('{0:.2f}'.format(domElapsed))
nonDomElapsedText = '{0:.2f}'.format(nonDomElapsed)
else:
nonDomElapsedText = boldify('{0:.2f}'.format(nonDomElapsed))
domElapsedText = '{0:.2f}'.format(domElapsed)
print " ", readableProblems[problem]+("*" if significant else ""), \
"&", nonDomElapsedText,\
"&", domElapsedText, "&", \
'{0:.2f}'.format(absoluteOverhead), \
"&", '{0:.2f}'.format(relOverhead), "\\\\"
scriptFile = open("query-dom-vs-nondom-overall.sql",'r')
script = scriptFile.read()
scriptFile.close()
print "\\midrule"
c.execute(script)
for row in c.fetchall():
(problem, nonDomElapsed, domElapsed, absoluteOverhead, relOverhead, significant) = row
if domElapsed < nonDomElapsed:
domElapsedText = boldify('{0:.2f}'.format(domElapsed))
nonDomElapsedText = '{0:.2f}'.format(nonDomElapsed)
else:
nonDomElapsedText = boldify('{0:.2f}'.format(nonDomElapsed))
domElapsedText = '{0:.2f}'.format(domElapsed)
print problem+("*" if significant else ""), \
"&", nonDomElapsedText,\
"&", domElapsedText, "&", \
'{0:.2f}'.format(absoluteOverhead), \
"&", '{0:.2f}'.format(relOverhead), "\\\\"
# query-dom-vs-nondom-overall.sql
print "\\bottomrule"
print "\\end{tabular*}"
In [50]:
# just some formatting stuff
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 10
plt.rcParams['axes.labelsize'] = 10
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['legend.fontsize'] = 10
plt.rcParams['figure.titlesize'] = 12
Attention, this table is customized afterwards
In [51]:
scriptFile = open("query-mif-stat-prob.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
mifElapseds = defaultdict(list)
mifStds = defaultdict(list)
normalElapseds = defaultdict(list)
normalStds = defaultdict(list)
# a dictionary for significance lookup
solversSignificant = defaultdict(list)
solvers = []
currProb = ""
for row in c.fetchall():
(problem, solver, mifElapsed, normalElapsed) = row
if not(solver in solvers):
solvers += [solver]
mifElapseds[solver] += [mifElapsed]
normalElapseds[solver] += [normalElapsed]
for s in solvers:
print s
[t, prob] = st.wilcoxon(mifElapseds[s], normalElapseds[s])
if prob < 0.05:
print "SIGNIFICANT t=", t, " prob = ", prob
solversSignificant[s] = True
else:
print "insignificant t=", t, " prob = ", prob
solversSignificant[s] = False
In [64]:
# first the views
scriptFile = open("query-mif-comp.sql",'r')
script = scriptFile.read()
c.executescript(script)
conn.commit()
scriptFile.close()
# then the highest-level aggregation
scriptFile = open("query-mif-comp-summary-couting.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
(avgDiff, sumMifWins, insts, ratio) = c.fetchone()
print "Over all", insts, "runs across solvers, problem instances and search types, the MIF heuristic " \
"led to a faster runtime in", sumMifWins, "cases", "("+'{0:.2f}'.format(ratio)+" \%) with the average runtime reduced by "+ \
'{0:.2f}'.format(abs(avgDiff)) +" seconds."
scriptFile = open("query-mif-comp-solver.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
timeDiffs = defaultdict(list)
relTimeDiffs = defaultdict(list)
mifElapseds = defaultdict(list)
mifStds = defaultdict(list)
normalElapseds = defaultdict(list)
normalStds = defaultdict(list)
mifWinss = defaultdict(list)
instances = defaultdict(list)
ratios = defaultdict(list)
solvers = []
for row in c.fetchall():
(solverName, mifElapsed, mifVar, normalElapsed, normalVar, timeDiff, relTimeDiff, mifWins, overall, ratio) = row
solvers += [solverName]
timeDiffs[solverName] = timeDiff
relTimeDiffs[solverName] = relTimeDiff
mifElapseds[solverName] = mifElapsed
mifStds[solverName] = np.sqrt(mifVar)
normalElapseds[solverName] = normalElapsed
normalStds[solverName] = np.sqrt(normalVar)
mifWinss[solverName] = mifWins
instances[solverName] = overall
ratios[solverName] = ratio
print solvers
print overall, "instances are included in these averages."
print "\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill} }l" + \
"".join(["d{1.1}" for s in [1]+solvers]) + "}"
print "\\toprule"
print " & ", " & ".join(["\\multicolumn{1}{c}{" + readable[s] + \
("*" if solversSignificant[s] else "") +"}" for s in solvers]), "\\\\"
print "\\midrule"
print "Instances & ", " & ".join(['{0:.0f}'.format(instances[s]) for s in solvers]), "\\\\"
print "Runtime difference & ", \
" & ".join(['{0:.2f}'.format(timeDiffs[s]) if timeDiffs[s] >= 0 else boldify('{0:.2f}'.format(timeDiffs[s]))\
for s in solvers]), "\\\\"
print "Rel. runtime diff. & ", " & ".join(['{0:.2f}'.format(relTimeDiffs[s]) for s in solvers]), "\\\\"
#print "# MIF wins & ", " & ".join(['{0:.2f}'.format(timeDiffs[s]) for s in solvers]), "\\\\"
print "Ratio MIF wins & ", \
" & ".join([ '{0:.2f}'.format(ratios[s]) if ratios[s] < 0.5 else boldify('{0:.2f}'.format(ratios[s]))\
for s in solvers]), "\\\\"
#print "Runtime difference & ", " & ".join(['{0:.2f}'.format(timeDiffs[s]) for s in solvers]), "\\\\"
print "\\bottomrule"
print "\\end{tabular*}"
In [53]:
"""
Bar chart demo with pairs of bars grouped for easy comparison.
"""
import numpy as np
isseorange = (1.0, 0.57647, 0.039216)
#\definecolor{issegrey}{RGB}{80,85,82}
issegrey = (80.0 / 255, 85.0 / 255, 82.0 / 255)
n_groups = len(solvers)
means_mif = [mifElapseds[s] for s in solvers]
std_mif = [mifStds[s] for s in solvers]
print means_mif
print std_mif
means_nomif = [normalElapseds[s] for s in solvers]
std_nomif = [normalStds[s] for s in solvers]
print means_nomif
print std_nomif
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.23
opacity = 0.9
error_config = {'ecolor': '0.3'}
plt.ylim([0,250])
plt.xlim([0,7])
rects1 = plt.bar(index, means_mif, bar_width,
alpha=opacity,
color=isseorange,
error_kw=error_config,
hatch="/",
label='MIF')
rects2 = plt.bar(index + bar_width, means_nomif, bar_width,
alpha=opacity,
color=issegrey,
hatch="\\",
error_kw=error_config,
label='No-MIF')
plt.xlabel('Solver')
plt.ylabel('Avg. Runtimes (secs)')
#plt.title('Runtimes by solver and heuristic')
plt.xticks(index + bar_width , ["Choco*", "G12", "Gecode", "Gecode Nat.", "JaCoP", "Toulbar2", "OR-Tools"])
# [ s if s != "NUMBERJACK" else "TOULBAR2" for s in solvers])
plt.legend()
plt.tight_layout()
# plt.savefig('runtime-mif-solver.pdf', bbox_inches='tight')
plt.show()
In [2]:
scriptFile = open("query-mif-stat-prob.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
mifElapseds = defaultdict(list)
mifStds = defaultdict(list)
normalElapseds = defaultdict(list)
normalStds = defaultdict(list)
# a dictionary for significance lookup
problemsSignificant = defaultdict(list)
problems = []
currProb = ""
for row in c.fetchall():
(problem, solver, mifElapsed, normalElapsed) = row
if currProb != problem:
problems += [problem]
currProb = problem
mifElapseds[problem] += [mifElapsed]
normalElapseds[problem] += [normalElapsed]
for p in problems:
print p
print len(mifElapseds[p])
print np.mean(mifElapseds[p]), " -- ", np.std(mifElapseds[p])
print np.mean(normalElapseds[p]), " -- ", np.std(normalElapseds[p])
#print [mifElapseds[p][i] < normalElapseds[p][i] | i in range(0, len(mifElapseds[p]))]
print sum(np.array(mifElapseds[p]) < np.array(normalElapseds[p]))
print sum(np.array(normalElapseds[p]) < np.array(mifElapseds[p]))
[t, prob] = st.wilcoxon(mifElapseds[p], normalElapseds[p], zero_method="wilcox")
if prob < 0.05:
print "SIGNIFICANT t=", t, " prob = ", prob
problemsSignificant[p] = True
else:
print "insignificant t=", t, " prob = ", prob
problemsSignificant[p] = False
The query for the table
In [3]:
scriptFile = open("query-mif-comp-problems.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
timeDiffs = defaultdict(list)
relTimeDiffs = defaultdict(list)
mifElapseds = defaultdict(list)
mifStds = defaultdict(list)
normalElapseds = defaultdict(list)
normalStds = defaultdict(list)
mifWinss = defaultdict(list)
instances = defaultdict(list)
ratios = defaultdict(list)
problems = []
for row in c.fetchall():
(problem, mifElapsed, mifVar, normalElapsed, normalVar, timeDiff, relTimeDiff, mifWins, overall, ratio) = row
problems += [problem]
timeDiffs[problem] = timeDiff
relTimeDiffs[problem] = relTimeDiff
mifElapseds[problem] = mifElapsed
mifStds[problem] = np.sqrt(mifVar)
normalElapseds[problem] = normalElapsed
normalStds[problem] = np.sqrt(normalVar)
mifWinss[problem] = mifWins
instances[problem] = overall
ratios[problem] = ratio
#print row
print problems
print overall, "instances are included in these averages."
print "\\begin{tabular*}{\\textwidth}{@{\\extracolsep{\\fill} }l" + \
"".join(["d{1.1}" for p in [1]+problems]) + "}"
print "\\toprule"
print " & ", " & ".join(["\\multicolumn{1}{c}{" + readableProblems[s] +\
("*" if problemsSignificant[s] else "") + "}" for s in problems]), "\\\\"
print "\\midrule"
print "Instances & ", " & ".join(['{0:.0f}'.format(instances[s]) for s in problems]), "\\\\"
print "Runtime difference & ",\
" & ".join(['{0:.2f}'.format(timeDiffs[s]) if timeDiffs[s] >= 0 else boldify('{0:.2f}'.format(timeDiffs[s]))\
for s in problems]), "\\\\"
print "Rel. runtime diff. & ", " & ".join(['{0:.2f}'.format(relTimeDiffs[s]) for s in problems]), "\\\\"
#print "# MIF wins & ", " & ".join(['{0:.2f}'.format(timeDiffs[s]) for s in solvers]), "\\\\"
print "Ratio MIF wins & ",\
" & ".join(['{0:.2f}'.format(ratios[s]) if ratios[s] < 0.5 else boldify('{0:.2f}'.format(ratios[s]))\
for s in problems]), "\\\\"
#print "Runtime difference & ", " & ".join(['{0:.2f}'.format(timeDiffs[s]) for s in solvers]), "\\\\"
print "\\bottomrule"
print "\\end{tabular*}"
In [ ]:
In [56]:
"""
Bar chart demo with pairs of bars grouped for easy comparison.
"""
import numpy as np
isseorange = (1.0, 0.57647, 0.039216)
#\definecolor{issegrey}{RGB}{80,85,82}
issegrey = (80.0 / 255, 85.0 / 255, 82.0 / 255)
n_groups = len(problems)
means_mif = [mifElapseds[p] for p in problems]
std_mif = [mifStds[p] for p in problems]
print means_mif
print std_mif
means_nomif = [normalElapseds[p] for p in problems]
std_nomif = [normalStds[p] for p in problems]
print means_nomif
print std_nomif
fig, ax = plt.subplots()
index = np.arange(n_groups)
bar_width = 0.2
opacity = 0.9
error_config = {'ecolor': '0.3'}
plt.ylim([0,250])
plt.xlim([0,5])
rects1 = plt.bar(index, means_mif, bar_width,
alpha=opacity,
color=isseorange,
error_kw=error_config,
hatch="/",
label='MIF')
rects2 = plt.bar(index + bar_width, means_nomif, bar_width,
alpha=opacity,
color=issegrey,
hatch="\\",
error_kw=error_config,
label='No-MIF')
plt.xlabel('Problem')
plt.ylabel('Avg. Runtimes (secs)')
#plt.title('Runtimes by problem and heuristic')
plt.xticks(index + bar_width , ["MSPSP", "On-call Rostering", "Photo", "Soft Queens", "Talent Scheduling"])
plt.legend()
plt.tight_layout()
plt.savefig('runtime-mif-problem.pdf', bbox_inches='tight')
plt.show()
In [57]:
# the overall query of significance
import numpy as np
import scipy.stats as st
scriptFile = open("query-mif-stat.sql",'r')
script = scriptFile.read()
scriptFile.close()
c.execute(script)
mifElapseds = []
normalElapseds = []
for row in c.fetchall():
(mifElapsed, normalElapsed) = row
mifElapseds += [mifElapsed]
normalElapseds += [normalElapsed]
mif = np.array(mifElapseds)
noMif = np.array(normalElapseds)
print "MIF: ", np.mean(mif), " - ", np.std(mif)
print "No MIF: ", np.mean(noMif), " - ", np.std(noMif)
[t, prob] = st.wilcoxon(mif, noMif)
if prob < 0.01:
print "SIGNIFICANT t=", t, " prob = ", prob
else:
print "insignificant t=", t, " prob = ", prob
In [58]:
#conn.close()