In [1]:
import numpy as np
import matplotlib.pyplot as plt
import analysisUtils.analysisUtils as au
import pylab
import scipy.stats as st
import GPy
In [2]:
# read test file
prefix = 'sampling/'
testFile = prefix + 'test.csv'
data = np.genfromtxt(testFile,dtype=float, delimiter=';', skip_header=0,names=True)
print data['column_B']
In [3]:
cps = [50, 200, 500]
sps = [5, 10, 15, 25, 35]
costsColumn = 'Top_level_costs_regio_central'
runtimeColumn = 'Total_runtime_regio_central'
for cp in cps:
# load central file
dataCentral = np.genfromtxt(prefix+'cp_'+str(cp)+'_.csv', dtype=float, delimiter=';', skip_header=0,names=True)
costsCentral = au.readCol(dataCentral, 'Top_level_costs_central')
meanCostsCentral = np.mean(costsCentral)
runtimeCentral = au.readCol(dataCentral, 'Total_runtime_central')
meanTimeCentral = np.mean(runtimeCentral)
print 'For ' + str(cp) + ' plants: Opt. Costs ', meanCostsCentral, ' Runtime Central ', meanTimeCentral
allSpCosts = []
for sp in sps:
fileName = prefix + 'sps_'+str(sp)+ '_cp_'+str(cp)+'_.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True)
costs = au.readCol(data, costsColumn)
meanCosts = np.mean(costs)
overShoot = meanCosts / meanCostsCentral
additionalCosts = meanCosts - meanCostsCentral
runtime = au.readCol(data, runtimeColumn)
meanTime = np.mean(runtime)
allSpCosts += [additionalCosts / cp]
print ' '+str(sp) + ' Sps: ' + str(meanCosts) + ' (+ ' + str(overShoot) + ' ; + ' + str(additionalCosts) + ' € ) / ' + str(meanTime)
y_pos = np.arange(len(sps))
plt.bar(y_pos, allSpCosts, align='center', alpha=0.4)
plt.xticks(y_pos, sps)
plt.title(str(cp) + ' plants ')
plt.figure()
In [12]:
cps = [50, 200, 500]
sps = [5, 10, 15, 25, 35]
costsColumn = 'Top_level_costs_regio_central'
runtimeColumn = 'Total_runtime_regio_central'
allOffSetCosts = { 5 : [],
10: [],
15 : [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
allOffSetCostsStd = { 5 : [],
10: [],
15: [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
allRuntimes = { 5 : [],
10: [],
15 : [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
for cp in cps:
# load central file
dataCentral = np.genfromtxt(prefix+'cp_'+str(cp)+'_.csv', dtype=float, delimiter=';', skip_header=0,names=True)
costsCentral = au.readCol(dataCentral, 'Top_level_costs_central')
meanCostsCentral = np.mean(costsCentral)
runtimeCentral = au.readCol(dataCentral, 'Total_runtime_central')
meanTimeCentral = np.mean(runtimeCentral)
print 'For ' + str(cp) + ' plants: Opt. Costs ', meanCostsCentral, ' Runtime Central ', meanTimeCentral
allSpCosts = []
for sp in sps:
fileName = prefix + 'sps_'+str(sp)+ '_cp_'+str(cp)+'_.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True)
costs = au.readCol(data, costsColumn)
allExtraCosts = costs - costsCentral
meanExtraCosts = np.mean(allExtraCosts)
overShoot = meanExtraCosts / meanCostsCentral
stdExtraCosts = np.sqrt(np.std(allExtraCosts))
stdExtraCosts = 0.25 * np.std(allExtraCosts)
allOffSetCosts[sp] += [meanExtraCosts / cp]
allOffSetCostsStd[sp] += [stdExtraCosts / cp]
runtime = au.readCol(data, runtimeColumn)
meanTime = np.mean(runtime)
print ' '+str(sp) + ' Sps: ' + str(meanExtraCosts) + ' (+ ' + str(overShoot) + ' ; + ' + str(meanExtraCosts) + ' € ) / ' + str(meanTime)
#y_pos = np.arange(len(sps))
#plt.bar(y_pos, allSpCosts, align='center', alpha=0.4)
#plt.xticks(y_pos, sps)
#plt.title(str(cp) + ' plants ')
#plt.figure()
In [13]:
N = len(cps)
ind = np.arange(N)
width = 0.15
# find gray values
colors = { 5 : '0.95', 10: '0.8', 15 : '0.65', 25 : '0.45', 35 : '0.25' }
hatches = { 5 : '//', 10: '\\\\', 15 : 'x', 25 : 'o', 35 : '-' }
fig, ax = plt.subplots()
i = 0
for sp in sps:
rects = ax.bar(ind+i*width, allOffSetCosts[sp], width, color=colors[sp], yerr = allOffSetCostsStd[sp], ecolor='black', hatch = hatches[sp], label=str(sp) + ' sps')
i+=1
ax.set_ylabel('Extra costs per plant (EUR)')
ax.set_xticks(ind+width)
ax.set_xticklabels(cps)
ax.set_xlabel('Problem sizes (\# plants)')
plt.legend(loc=2)
savefig('SamplingCosts.pdf')
plt.show()
In [6]:
from pylab import arange,pi,sin,cos,sqrt
fig_width_pt = 400 # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0/72.27 # Convert pt to inch
golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt # width in inches
fig_height = fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height]
params = {'backend': 'ps',
'axes.labelsize': 12,
'text.fontsize': 12,
'legend.fontsize': 12,
'xtick.labelsize': 10,
'ytick.labelsize': 10,
'text.usetex': True,
'figure.figsize': fig_size}
pylab.rcParams.update(params)
In [7]:
cps = [50, 200, 500]
sps = [5, 10, 15, 25, 35]
costsColumn = 'Top_level_costs_regio_central'
runtimeColumn = 'Total_runtime_regio_central'
topString = "\\begin{tabular*}{\\textwidth}{@{\extracolsep{\\fill} }l"
cols = 5
singleCol = "d{3}"
colString = singleCol * cols
topString = topString + colString + "}"
print topString
print "\\toprule"
colHeaders = ['\# sampling points', 'costs per step', 'rel. overhead', 'abs. overhead', 'runtime']
headers = ""
first = True
for h in colHeaders:
if(first):
first = False
else:
headers = headers + " & "
headers = headers + "\multicolumn{1}{c}{"+h+"}"
print headers + "\\\\"
for cp in cps:
# load central file
dataCentral = np.genfromtxt(prefix+'cp_'+str(cp)+'_.csv', dtype=float, delimiter=';', skip_header=0,names=True)
costsCentral = au.readCol(dataCentral, 'Top_level_costs_central')
meanCostsCentral = np.mean(costsCentral)
stdCostsCentral = np.std(costsCentral)
runtimeCentral = au.readCol(dataCentral, 'Total_runtime_central')
meanTimeCentral = np.mean(runtimeCentral)
stdTimeCentral = np.std(runtimeCentral)
print '\midrule'
print '\multicolumn{'+str(cols)+'}{l}{'+str(cp) + ' power plants : } \\\\' # (Mean Opt. Costs ', np.around(meanCostsCentral, 2),' ): \\\\' # Opt. Costs ', meanCostsCentral, ' Runtime Central ', meanTimeCentral
print '\midrule'
allSpCosts = []
for sp in sps:
fileName = prefix + 'sps_'+str(sp)+ '_cp_'+str(cp)+'_.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True)
costs = au.readCol(data, costsColumn)
allExtraCosts = costs - costsCentral
meanExtraCosts = np.mean(allExtraCosts)
allOverShoots = ((costs / costsCentral ) - 1) * 100
stdOvershoot = np.std(allOverShoots)
overShoot = meanExtraCosts / meanCostsCentral
stdExtraCosts = np.sqrt(np.std(allExtraCosts))
meanCosts = np.mean(costs)
stdCosts = np.std(costs)
overShoot = ((meanCosts / meanCostsCentral) - 1) * 100
additionalCosts = meanCosts - meanCostsCentral
runtime = au.readCol(data, runtimeColumn)
meanTime = np.mean(runtime)
stdTime = np.std(runtime)
allSpCosts += [additionalCosts / cp]
print ' '+str(sp) + ' sps & ' + str(np.around(meanCosts, 2)) + '~$\euro$ ~('+str(np.around(stdCosts, 2))+')& + ' + str(np.around(overShoot, 2)) + ' \% ~('+str(np.around(stdOvershoot, 2))+')& + ' + str(np.around(additionalCosts, 2)) + ' ~$\euro$ ~('+str(np.around(stdExtraCosts, 2))+') & ' + str(np.around(meanTime, 2)) + " ~\\text{sec} ~("+str(np.around(stdTime, 2)) +") \\\\"
print ' \\emph{Optimum} & ' + au.boldify(str(np.around(meanCostsCentral, 2))) + "~$\euro$ ~("+str(np.around(stdCostsCentral, 2))+ ") \\\\"
print '\\bottomrule'
print '\end{tabular*}'
In [8]:
cps = [50, 200, 500]
sps = [5, 10, 15, 25, 35]
costsColumn = 'Top_level_costs_regio_central'
runtimeColumn = 'Total_runtime_regio_central'
for cp in cps:
# load central file
allSpCosts = []
allCosts = { 5 : [],
15 : [],
25 : [],
35 : [] }
print '--------------------------'
print 'Considering ', str(cp), ' plants'
for sp in sps:
fileName = prefix + 'sps_'+str(sp)+ '_cp_'+str(cp)+'_.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True)
costs = au.readCol(data, costsColumn)
allCosts[sp] = costs
for sp in sps:
print 'Comparing ',sp,' to optimal solution: '
dataCentral = np.genfromtxt(prefix+'cp_'+str(cp)+'_.csv', dtype=float, delimiter=';', skip_header=0,names=True)
costsCentral = au.readCol(dataCentral, 'Top_level_costs_central')
costsSp = np.array(allCosts[sp])
[t, prob] = st.ttest_rel(np.array(allCosts[sp]), np.array(costsCentral))
print 't: ', t, ' prob: ', prob
if prob < 0.01:
print ' SIGNIFICANT'
else:
print ' insignficant'
for otherSp in sps:
if otherSp > sp:
costsOtherSp = np.array(allCosts[otherSp])
np.array(allCosts[sp])
print 'testing : ', sp, ' and ', otherSp
#print ' var ', sp, ' : ', np.var(costsSp)
#print ' var ', otherSp, ' : ', np.var(np.array(allCosts[otherSp]))
#print ' bartlett, ', st.bartlett(costsOtherSp, costsSp)
[t, prob] = st.ttest_rel(np.array(allCosts[sp]), np.array(allCosts[otherSp]))
print ' t: ', t, ' prob: ', prob
if prob < 0.01:
print ' SIGNIFICANT'
else:
print ' insignficant'
print '------------------------'
In [5]:
cps = [500]
sps = [5, 10, 15, 25, 35]
costsColumn = 'Top_level_costs_regio_central'
runtimeColumn = 'Total_runtime_regio_central'
allOffSetCosts = { 5 : [],
10: [],
15 : [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
allOffSetCostsStd = { 5 : [],
10: [],
15: [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
allRuntimes = { 5 : [],
10: [],
15 : [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
for cp in cps:
# load central file
dataCentral = np.genfromtxt(prefix+'cp_'+str(cp)+'_.csv', dtype=float, delimiter=';', skip_header=0,names=True)
costsCentral = au.readCol(dataCentral, 'Top_level_costs_central')
meanCostsCentral = np.mean(costsCentral)
runtimeCentral = au.readCol(dataCentral, 'Total_runtime_central')
meanTimeCentral = np.mean(runtimeCentral)
print 'For ' + str(cp) + ' plants: Opt. Costs ', meanCostsCentral, ' Runtime Central ', meanTimeCentral
allSpCosts = []
allExcessCosts = []
for sp in sps:
fileName = prefix + 'sps_'+str(sp)+ '_cp_'+str(cp)+'_.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True)
costs = au.readCol(data, costsColumn)
allExtraCosts = costs - costsCentral
meanExtraCosts = np.mean(allExtraCosts)
overShoot = meanExtraCosts / meanCostsCentral
stdExtraCosts = np.sqrt(np.std(allExtraCosts))
stdExtraCosts = 0.25 * np.std(allExtraCosts)
allOffSetCosts[sp] += [meanExtraCosts / cp]
allOffSetCostsStd[sp] += [stdExtraCosts / cp]
allExcessCosts += [meanExtraCosts]
runtime = au.readCol(data, runtimeColumn)
meanTime = np.mean(runtime)
print ' '+str(sp) + ' Sps: ' + str(meanExtraCosts) + ' (+ ' + str(overShoot) + ' ; + ' + str(meanExtraCosts) + ' € ) / ' + str(meanTime)
kernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)
m = GPy.models.GPRegression(np.atleast_2d(sps).T, np.atleast_2d(allExcessCosts).T, kernel)
m.optimize_restarts(num_restarts=10)
m.plot()
xlabel('\# sampling points')
ylabel('Extra costs per plant (EUR)')
#title('Extra costs for \#sampling points')
savefig('sampling-extra-costs'+str(cp)+'.pdf')
figure()
In [10]:
from pylab import arange,pi,sin,cos,sqrt
fig_width_pt = 400 # Get this from LaTeX using \showthe\columnwidth
inches_per_pt = 1.0/72.27 # Convert pt to inch
golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt # width in inches
fig_height = fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height]
params = {'backend': 'ps',
'axes.labelsize': 10,
'text.fontsize': 10,
'legend.fontsize': 10,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'text.usetex': True,
'figure.figsize': fig_size}
pylab.rcParams.update(params)
In [10]:
cps = [500]
sps = [5, 10, 15, 25, 35]
costsColumn = 'Top_level_costs_regio_central'
runtimeColumn = 'Total_runtime_regio_central'
allOffSetCosts = { 5 : [],
10: [],
15 : [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
allOffSetCostsStd = { 5 : [],
10: [],
15: [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
allRuntimes = { 5 : [],
10: [],
15 : [],
25 : [],
35 : [] }# for sp = 5 a list (50, 200, 500), for sp = 10 a list (50, 200, 500) ...
for cp in cps:
# load central file
dataCentral = np.genfromtxt(prefix+'cp_'+str(cp)+'_.csv', dtype=float, delimiter=';', skip_header=0,names=True)
costsCentral = au.readCol(dataCentral, 'Top_level_costs_central')
meanCostsCentral = np.mean(costsCentral)
runtimeCentral = au.readCol(dataCentral, 'Total_runtime_central')
meanTimeCentral = np.mean(runtimeCentral)
print 'For ' + str(cp) + ' plants: Opt. Costs ', meanCostsCentral, ' Runtime Central ', meanTimeCentral
allSpCosts = []
allExcessCosts = []
allInputTimes = []
for sp in sps:
fileName = prefix + 'sps_'+str(sp)+ '_cp_'+str(cp)+'_.csv'
data = np.genfromtxt(fileName,dtype=float, delimiter=';', skip_header=0,names=True)
costs = au.readCol(data, costsColumn)
allExtraCosts = costs - costsCentral
meanExtraCosts = np.mean(allExtraCosts)
overShoot = meanExtraCosts / meanCostsCentral
stdExtraCosts = np.sqrt(np.std(allExtraCosts))
stdExtraCosts = 0.25 * np.std(allExtraCosts)
allOffSetCosts[sp] += [meanExtraCosts / cp]
allOffSetCostsStd[sp] += [stdExtraCosts / cp]
allExcessCosts += [meanExtraCosts]
runtime = au.readCol(data, runtimeColumn)
meanTime = np.mean(runtime)
allInputTimes += [meanTime]
print ' '+str(sp) + ' Sps: ' + str(meanExtraCosts) + ' (+ ' + str(overShoot) + ' ; + ' + str(meanExtraCosts) + ' € ) / ' + str(meanTime)
kernel = GPy.kern.Brownian(input_dim=1)
m = GPy.models.GPRegression(np.atleast_2d(allInputTimes).T, np.atleast_2d(allExcessCosts).T, kernel)
m.optimize_restarts(num_restarts=10)
m.plot()
xlabel('\# sampling points')
ylabel('Extra costs per plant (EUR)')
#title('Extra costs for \#sampling points')
savefig('sampling-extra-time'+str(cp)+'.pdf')
figure()
In [ ]: