In [1]:
# reads a file generated by a central run to calculate split times more accurately
import numpy as np
import StringIO
import matplotlib.pyplot as plt
import analysisUtils.analysisUtils as au
import pylab as P
import scipy.stats as st
import cleanFile
def visualizeColumn(columnA, columnB, inputs, prefix, outputfile, inputLabel, outputLabel, titleFig):
averageRuntimes = []
averageRuntimesRegio = []
for i in inputs:
fileName = prefix+str(i)+'.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True,autostrip=True)
runtimeCentral = data[columnA]
runtimeCentral = runtimeCentral[~numpy.isnan(runtimeCentral)]
runtimeRegioCentral = data[columnB]
runtimeRegioCentral = runtimeRegioCentral[~numpy.isnan(runtimeRegioCentral)]
meanTime = np.mean(runtimeCentral)
stdTime = np.std(runtimeCentral)
print 'mean: ', meanTime, ' std: ', stdTime
print runtimeCentral
averageRuntimes += [(i, meanTime, stdTime/6)]
meanRegioTime = np.mean(runtimeRegioCentral)
stdRegioTime = np.std(runtimeRegioCentral)
averageRuntimesRegio += [(i, meanRegioTime, stdRegioTime/6)]
plt.figure()
averageRuntimes = np.array(averageRuntimes)
averageRuntimesRegio = np.array(averageRuntimesRegio)
print averageRuntimes
print '-------'
print averageRuntimesRegio
#plot(averageRuntimes[:,0],averageRuntimes[:,1], 'rx-')
fig, ax = plt.subplots()
plt.errorbar(averageRuntimes[:,0], averageRuntimes[:,1], averageRuntimes[:,2], label='Central',fmt='.-')
plt.errorbar(averageRuntimesRegio[:,0], averageRuntimesRegio[:,1], averageRuntimesRegio[:,2], label='Hierarchical',fmt='x--')
plt.legend(loc=2)
xlabel(inputLabel)
ylabel(outputLabel)
#title(titleFig)
fig = matplotlib.pyplot.gcf()
# fig.set_size_inches(18.5,10.5)
savefig(outputfile)
In [2]:
def visualizeLSP(columnA, columnB, columnC, inputs, prefix, outputfile, inputLabel, outputLabel, titleFig):
averageRuntimes = []
averageRuntimesRegio = []
averageRuntimesLSP = []
for i in inputs:
fileName = prefix+str(i)+'.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True,autostrip=True)
runtimeCentral = data[columnA]
runtimeCentral = runtimeCentral[~numpy.isnan(runtimeCentral)]
runtimeRegioCentral = data[columnB]
runtimeRegioCentral = runtimeRegioCentral[~numpy.isnan(runtimeRegioCentral)]
runtimeLSP = data[columnC]
runtimeLSP = runtimeLSP[~numpy.isnan(runtimeLSP)]*1e-9
meanTime = np.mean(runtimeCentral)
stdTime = np.std(runtimeCentral)
averageRuntimes += [(i, meanTime, stdTime/6)]
meanRegioTime = np.mean(runtimeRegioCentral)
stdRegioTime = np.std(runtimeRegioCentral)
averageRuntimesRegio += [(i, meanRegioTime, stdRegioTime/6)]
maxLsp = np.max(runtimeLSP)
averageRuntimesLSP += [(i, maxLsp)]
plt.figure()
averageRuntimes = np.array(averageRuntimes)
averageRuntimesRegio = np.array(averageRuntimesRegio)
averageRuntimesLSP = np.array(averageRuntimesLSP)
print averageRuntimes
print '-------'
print averageRuntimesRegio
#plot(averageRuntimes[:,0],averageRuntimes[:,1], 'rx-')
fig, ax = plt.subplots()
plt.errorbar(averageRuntimes[:,0], averageRuntimes[:,1], averageRuntimes[:,2], label='Central',fmt='.-')
plt.errorbar(averageRuntimesRegio[:,0], averageRuntimesRegio[:,1], averageRuntimesRegio[:,2], label='Hierarchical Sequential',fmt='x--')
plt.plot(averageRuntimesLSP[:,0], averageRuntimesLSP[:,1], 'o:', label = 'Longest Seq. Path')
#plt.errorbar(averageRuntimesLSP[:,0], averageRuntimesLSP[:,1], averageRuntimesLSP[:,2], label='Hierarchical Parallel',fmt='o:')
plt.legend(loc=2)
xlabel(inputLabel)
ylabel(outputLabel)
#title(titleFig)
fig = matplotlib.pyplot.gcf()
# fig.set_size_inches(18.5,10.5)
savefig(outputfile)
In [3]:
from pylab import arange,pi,sin,cos,sqrt
fig_width_pt = 400 # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0/72.27 # Convert pt to inch
golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt # width in inches
fig_height = fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height]
params = {'backend': 'ps',
'axes.labelsize': 12,
'text.fontsize': 12,
'legend.fontsize': 12,
'xtick.labelsize': 10,
'ytick.labelsize': 10,
'text.usetex': True,
'figure.figsize': fig_size}
pylab.rcParams.update(params)
In [4]:
prefix = 'scalability/scalability'
column = 'runtimePerStepCentral'
columnRegio = 'runtimePerStepRegioCentral'
inputLabel = '\# plants'
outputLabel ='runtime per step (secs)'
titleFig = 'Mean runtimes per step for \#plants'
inputs = [50, 100, 150, 200, 250, 300, 350, 400, 700, 800, 900]
outputfile = 'meantimesComparison.pdf'
#inputs = [50]
visualizeColumn(column,columnRegio, inputs, prefix, outputfile, inputLabel, outputLabel, titleFig)
column = 'Top_level_costs_central'
columnRegio = 'Top_level_costs_regio_central'
outputfile = 'costsComparison.pdf'
inputLabel = '\# plants'
outputLabel ='Costs (EUR)'
titleFig = 'Mean costs per \#plants'
#visualizeColumn(column,columnRegio, inputs, prefix, outputfile, inputLabel, outputLabel, titleFig)
In [5]:
prefix = 'scalability/scalability'
column = 'runtimePerStepCentral'
columnRegio = 'runtimePerStepRegioCentral'
columnLSP = 'Longest_serial_path'
inputLabel = '\# plants'
outputLabel ='runtime per step (secs)'
titleFig = 'Mean runtimes per step for \#plants'
inputs = [50, 100, 150, 200, 250, 300, 350, 400, 700, 900]
outputfile = 'meantimesComparisonLSP.pdf'
#inputs = [50]
visualizeLSP(column,columnRegio, columnLSP, inputs, prefix, outputfile, inputLabel, outputLabel, titleFig)
column = 'Top_level_costs_central'
columnRegio = 'Top_level_costs_regio_central'
outputfile = 'costsComparison.pdf'
inputLabel = '\# plants'
outputLabel ='Costs (EUR)'
titleFig = 'Mean costs per \#plants'
#visualizeColumn(column,columnRegio, inputs, prefix, outputfile, inputLabel, outputLabel, titleFig)
In [6]:
# just a playground
import numpy as np
def printTable(X, cols, rows, rowLabel):
topString = "\\begin{tabular*}{\\textwidth}{@{\extracolsep{\\fill} }l"
colLen = len(cols)+1
singleCol = "d{3}"
colString = singleCol * colLen
topString = topString + colString + "}"
print topString
print "\\toprule"
headers = rowLabel + " & "
first = True
for h in cols:
if(first):
first = False
else:
headers = headers + " & "
headers = headers + "\multicolumn{1}{c}{"+h+"}"
print headers + "\\\\"
print "\\midrule"
i = 0
for row in X:
print rows[i],
for col in row:
print " & " + str(col),
print "\\\\"
i = i + 1
print '\\bottomrule'
print '\end{tabular*}'
In [7]:
# prepare table
inputs = [50, 100, 150, 200, 250, 300, 350, 400, 700, 800, 900]
#inputs = [50, 900]
prefix = 'scalability/scalability'
columns = [ 'runtimePerStepCentral', 'runtimePerStepRegioCentral', 'Top_level_costs_central', 'Top_level_costs_regio_central' ]
colHeaders = [ '$T_{\\text{centr}}$', '$T_{\\text{hier}}$', 'Rel.', '$\Gamma_{\\text{centr}}$', '$\Gamma_{\\text{hier}}$', 'Rel.']
column = 'Top_level_costs_central'
columnRegio = 'Top_level_costs_regio_central'
columnTime = 'runtimePerStepCentral'
columnRegioTime = 'runtimePerStepRegioCentral'
rows = [str(i) for i in inputs]
# extend to second row
b = [ [i, ' '] for i in rows]
#rows = [item for sublist in b for item in sublist]
X = []
for inp in inputs:
fileName = prefix+str(inp)+'.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True,autostrip=True)
newLine = []
stdDevLine = []
print str(inp),
## time diff
timeCentral = au.readCol(data, columnTime)
timeRegio = au.readCol(data, columnRegioTime)
centralMean = np.mean(timeCentral)
regioMean = np.mean(timeRegio)
meanVal = np.mean(timeCentral)
stdVal = np.std(timeCentral)
stdString = "("+str(np.around(stdVal, 2)) + ")"
newLine += [str(np.around(meanVal, 2)) + "~"+stdString]
stdDevLine += ["("+str(np.around(stdVal, 2)) + ")"]
meanVal = np.mean(timeRegio)
stdVal = np.std(timeRegio)
stdString = "("+str(np.around(stdVal, 2)) + ")"
newLine += [str(np.around(meanVal, 2)) + "~" +stdString]
stdDevLine += ["("+str(np.around(stdVal, 2)) + ")"]
newLine += [str( np.around(100*(regioMean / centralMean), 2)) + " \%"]
## extra costs
costsCentral = au.readCol(data, column)
costsRegio = au.readCol(data, columnRegio)
extraCosts = (- 1.0 + (costsRegio / costsCentral)) * 100.0
meanExtra = np.mean(extraCosts)
meanVal = np.mean(costsCentral)
stdVal = np.std(costsCentral)
stdString = "("+str(np.around(stdVal, 2)) + ")"
newLine += [str(np.around(meanVal, 2)) + "~"+stdString]
stdDevLine += ["("+str(np.around(stdVal, 2)) + ")"]
meanVal = np.mean(costsRegio)
stdVal = np.std(costsRegio)
stdString = "("+str(np.around(stdVal, 2)) + ")"
newLine += [str(np.around(meanVal, 2)) + stdString ]
newLine += ["+ " + str(np.around(meanExtra, 2)) + " \%"]
stdDevLine += ["("+str(np.around(stdVal, 2)) + ")"]
X += [newLine]
#X += [stdDevLine]
print X
printTable(X, colHeaders, rows, '\# plants')
In [8]:
# stats (t-test runtime/costs)
# prepare table
inputs = [50, 100, 150, 200, 250, 300, 350, 400, 700, 800, 900]
#inputs = [50, 900]
prefix = 'scalability/scalability'
columns = [ 'runtimePerStepCentral', 'runtimePerStepRegioCentral', 'Top_level_costs_central', 'Top_level_costs_regio_central' ]
colHeaders = [ '$T_{\\text{centr}}$', '$T_{\\text{hier}}$', 'Rel.', '$\Gamma_{\\text{centr}}$', '$\Gamma_{\\text{hier}}$', 'Rel.']
column = 'Top_level_costs_central'
columnRegio = 'Top_level_costs_regio_central'
columnTime = 'runtimePerStepCentral'
columnRegioTime = 'runtimePerStepRegioCentral'
rows = [str(i) for i in inputs]
# extend to second row
b = [ [i, ' '] for i in rows]
#rows = [item for sublist in b for item in sublist]
X = []
for inp in inputs:
fileName = prefix+str(inp)+'.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True,autostrip=True)
newLine = []
stdDevLine = []
## time diff
timeCentral = au.readCol(data, columnTime)
timeRegio = au.readCol(data, columnRegioTime)
print ' Len, ', timeCentral.shape
print 'For ', inp ,' plants: '
print ' Times: ',
[t, prob] = st.ttest_rel(timeCentral, timeRegio)
if prob < 0.01:
print ' SIGNIFICANT t=', t, ' prob = ', prob
else:
print ' insignificant t=', t, ' prob = ', prob
## extra costs
costsCentral = au.readCol(data, column)
costsRegio = au.readCol(data, columnRegio)
print ' Costs: ',
[t, prob] = st.ttest_rel(costsCentral, costsRegio)
if prob < 0.01:
print ' SIGNIFICANT t=', t, ' prob = ', prob
else:
print ' insignificant t=', t, ' prob = ', prob
In [9]:
# abstraction time ammortizes?
# prepare table
inputs = [50, 100, 150, 200, 250, 300, 350, 400, 700, 800, 900]
#inputs = [50, 900]
prefix = 'scalability/scalability'
columnTotalTime = 'Total_runtime_central'
columnTotalTimeRegio = 'Total_runtime_regio_central'
columnTime = 'runtimePerStepCentral'
columnRegioTime = 'runtimePerStepRegioCentral'
rows = [str(i) for i in inputs]
for inp in inputs:
fileName = prefix+str(inp)+'.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True,autostrip=True)
## time diff
timeCentral = au.readCol(data, columnTime)
timeRegio = au.readCol(data, columnRegioTime)
totalTimeCentral = au.readCol(data, columnTotalTime)
totalTimeRegio = au.readCol(data, columnTotalTimeRegio)
print inp, ': '
print ' time / step central ', np.mean(timeCentral), ' time / step regio ', np.mean(timeRegio)
print ' total time central ', np.mean(totalTimeCentral), ' total time regio ', np.mean(totalTimeRegio)
print ' total ratio: ', np.mean(totalTimeRegio) / np.mean(totalTimeCentral)
# variable costs
varCosts = 48 * np.mean(timeRegio)
diff = np.mean(totalTimeRegio) - varCosts
print ' abstraction duration: ', diff, ' rel: ', diff / np.mean(totalTimeRegio)
In [9]: