Mark Santcroos mark.santcroos@rutgers.edu
In [73]:
import pandas as pd
import pprint
import os
from radical.pilot import utils as rpu
import radical.utils as ru
import numpy as np
import matplotlib as mp
In [3]:
# 'Magic' commands
%matplotlib inline
#%pylab inline
format='png' # pdf, png, svg
%config InlineBackend.figure_formats=[format]
#mp.style.use('fivethirtyeight')
mp.style.use('ggplot')
mp.pylab.rcParams['figure.figsize'] = (13, 9)
#Colormap possible values are:
#Spectral, summer, coolwarm, Wistia_r, pink_r, Set1, Set2, Set3, brg_r, Dark2, prism, PuOr_r, afmhot_r,
#terrain_r, PuBuGn_r, RdPu, gist_ncar_r, gist_yarg_r, Dark2_r, YlGnBu, RdYlBu, hot_r, gist_rainbow_r,
#gist_stern, PuBu_r, cool_r, cool, gray, copper_r, Greens_r, GnBu, gist_ncar, spring_r, gist_rainbow,
#gist_heat_r, Wistia, OrRd_r, CMRmap, bone, gist_stern_r, RdYlGn, Pastel2_r, spring, terrain, YlOrRd_r,
#Set2_r, winter_r, PuBu, RdGy_r, spectral, rainbow, flag_r, jet_r, RdPu_r, gist_yarg, BuGn, Paired_r,
#hsv_r, bwr, cubehelix, Greens, PRGn, gist_heat, spectral_r, Paired, hsv, Oranges_r, prism_r, Pastel2,
#Pastel1_r, Pastel1, gray_r, jet, Spectral_r, gnuplot2_r, gist_earth, YlGnBu_r, copper, gist_earth_r,
#Set3_r, OrRd, gnuplot_r, ocean_r, brg, gnuplot2, PuRd_r, bone_r, BuPu, Oranges, RdYlGn_r, PiYG,
#CMRmap_r, YlGn, binary_r, gist_gray_r, Accent, BuPu_r, gist_gray, flag, bwr_r, RdBu_r, BrBG, Reds,
#Set1_r, summer_r, GnBu_r, BrBG_r, Reds_r, RdGy, PuRd, Accent_r, Blues, autumn_r, autumn, cubehelix_r,
#nipy_spectral_r, ocean, PRGn_r, Greys_r, pink, binary, winter, gnuplot, RdYlBu_r, hot, YlOrBr,
#coolwarm_r, rainbow_r, Purples_r, PiYG_r, YlGn_r, Blues_r, YlOrBr_r, seismic, Purples, seismic_r, RdBu,
#Greys, BuGn_r, YlOrRd, PuOr, PuBuGn, nipy_spectral, afmhot
In [4]:
#
# exp1 with 3 iterations
#
exp1 = {
'rp.session.netbook.mark.016591.0006': {
'pilot_cores': 256,
'cu_cores': 1,
'profiling': True,
'cu_count': 512,
'cu_runtime': 0,
'number_of_workers': 1,
'pilot_runtime': 30,
'iteration': 2,
'backend': 'ORTE'
},
'rp.session.netbook.mark.016591.0007': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 1, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0004': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 300, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0005': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 600, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0002': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0003': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 120, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0000': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 10, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0001': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 30, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0008': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 10, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0009': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 30, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1002': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 1, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1003': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 10, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1001': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1006': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 120, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1007': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 300, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1004': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 30, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1005': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1008': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 600, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1009': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.1010': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 1, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0011': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 120, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0010': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0013': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 600, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0012': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 300, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}}
','.join(exp1.keys())
Out[4]:
In [5]:
#
# exp2 with 3 iterations
#
exp2 = {
'rp.session.netbook.mark.016590.0025': {
'pilot_cores': 256,
'cu_cores': 128,
'profiling': True,
'cu_count': 8,
'cu_runtime': 60,
'number_of_workers': 1,
'pilot_runtime': 30,
'iteration': 2,
'backend': 'ORTE'
},
'rp.session.netbook.mark.016590.0024': {'pilot_cores': 256, 'cu_cores': 64, 'profiling': True, 'cu_count': 16, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0018': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 1024, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0019': {'pilot_cores': 256, 'cu_cores': 2, 'profiling': True, 'cu_count': 512, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0012': {'pilot_cores': 256, 'cu_cores': 8, 'profiling': True, 'cu_count': 128, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0013': {'pilot_cores': 256, 'cu_cores': 16, 'profiling': True, 'cu_count': 64, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0010': {'pilot_cores': 256, 'cu_cores': 2, 'profiling': True, 'cu_count': 512, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0011': {'pilot_cores': 256, 'cu_cores': 4, 'profiling': True, 'cu_count': 256, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0016': {'pilot_cores': 256, 'cu_cores': 128, 'profiling': True, 'cu_count': 8, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0017': {'pilot_cores': 256, 'cu_cores': 256, 'profiling': True, 'cu_count': 4, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0014': {'pilot_cores': 256, 'cu_cores': 32, 'profiling': True, 'cu_count': 32, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0015': {'pilot_cores': 256, 'cu_cores': 64, 'profiling': True, 'cu_count': 16, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0005': {'pilot_cores': 256, 'cu_cores': 32, 'profiling': True, 'cu_count': 32, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0004': {'pilot_cores': 256, 'cu_cores': 16, 'profiling': True, 'cu_count': 64, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0021': {'pilot_cores': 256, 'cu_cores': 8, 'profiling': True, 'cu_count': 128, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0020': {'pilot_cores': 256, 'cu_cores': 4, 'profiling': True, 'cu_count': 256, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0001': {'pilot_cores': 256, 'cu_cores': 2, 'profiling': True, 'cu_count': 512, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0000': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 1024, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0003': {'pilot_cores': 256, 'cu_cores': 8, 'profiling': True, 'cu_count': 128, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0002': {'pilot_cores': 256, 'cu_cores': 4, 'profiling': True, 'cu_count': 256, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0009': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 1024, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0008': {'pilot_cores': 256, 'cu_cores': 256, 'profiling': True, 'cu_count': 4, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0023': {'pilot_cores': 256, 'cu_cores': 32, 'profiling': True, 'cu_count': 32, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0022': {'pilot_cores': 256, 'cu_cores': 16, 'profiling': True, 'cu_count': 64, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0007': {'pilot_cores': 256, 'cu_cores': 128, 'profiling': True, 'cu_count': 8, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0006': {'pilot_cores': 256, 'cu_cores': 64, 'profiling': True, 'cu_count': 16, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016590.0026': {'pilot_cores': 256, 'cu_cores': 256, 'profiling': True, 'cu_count': 4, 'cu_runtime': 60, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}}
#pprint.pprint(exp2)
','.join(exp2.keys())
Out[5]:
In [6]:
#
# exp3 with 3 iterations
#
exp3 = {
'rp.session.netbook.mark.016591.0028': {
'pilot_cores': 256,
'cu_cores': 1,
'profiling': True,
'cu_count': 512,
'cu_runtime': 0,
'number_of_workers': 7,
'pilot_runtime': 30,
'iteration': 1,
'backend': 'ORTE'},
'rp.session.netbook.mark.016591.0029': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 8, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0024': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 3, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0025': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 4, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0026': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 5, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0027': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 6, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0020': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 7, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0021': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 8, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0022': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0023': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 2, 'pilot_runtime': 30, 'iteration': 1, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0015': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 2, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0014': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0017': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 4, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0016': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 3, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0019': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 6, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0018': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 5, 'pilot_runtime': 30, 'iteration': 0, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0037': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 8, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0036': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 7, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0035': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 6, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0034': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 5, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0033': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 4, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0032': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 3, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0031': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 2, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}, 'rp.session.netbook.mark.016591.0030': {'pilot_cores': 256, 'cu_cores': 1, 'profiling': True, 'cu_count': 512, 'cu_runtime': 0, 'number_of_workers': 1, 'pilot_runtime': 30, 'iteration': 2, 'backend': 'ORTE'}}
#pprint.pprint(exp3)
','.join(exp3.keys())
Out[6]:
In [7]:
#
# List of experiments that it used by data wrangling functions
#
experiments = [exp1, exp2, exp3]
In [9]:
#
# Function for extracting a selection of experiments from the experiment repo
#
def select_exp_from_repo(repo, filter={}):
exp_ids = []
for exp_name, exp_config in repo.iteritems():
match = True
for filter_key, filter_val in filter.iteritems():
if filter_val != exp_config[filter_key]:
match = False
break
if match:
exp_ids.append(exp_name)
return exp_ids
def _test_get_experiments():
print select_exp_from_repo(exp3, {
# 'cu_cores': 1,
# 'cu_count': 500,
# 'nodes': 10,
# 'exec_workers': 1,
# 'spawner': 'shell',
# 'launcher': 'orte'
})
#_test_get_experiments()
In [10]:
#
# Turn ID into a name that can be used as a python identifier.
#
def normalize_id(sid):
return sid.replace('.', '_')
In [11]:
#
# Write session into HDF5 store
#
def stat_session(store, db, cachedir, session_id):
session_frame, pilot_frame, unit_frame = rpu.get_session_frames(db, session_id, cachedir)
norm_session_id = normalize_id(session_id)
store.put('%s/session' % norm_session_id, session_frame)
store.put('%s/pilots' % norm_session_id, pilot_frame)
store.put('%s/units' % norm_session_id, unit_frame)
In [12]:
#
# MongoDB URL
#
import radical.utils as ru
dburl = ru.Url(os.environ['RADICAL_PILOT_DBURL'])
#dburl = ru.Url('mongodb://ec2-54-221-194-147.compute-1.amazonaws.com:24242/')
if not dburl.path or '/' == dburl.path:
dburl.path = 'radicalpilot'
print 'Using MongoDB at: %s' % dburl
In [13]:
#
# Location to store raw json session data
#
cachedir = '/Users/mark/Documents/experiments/bluewaters/data/json'
if not os.path.isdir(cachedir):
raise("No valid cache dir: %s" % cachedir)
In [14]:
#
# Location where raw profiling data is stored
#
profdir = '/Users/mark/Documents/experiments/bluewaters/data/profiling'
In [16]:
#
# Store JSON session data in HDF5 database
#
# TODO: prevent duplication
# TODO: Look into performance degradation warning
#
hdf5dir = '/Users/mark/Documents/experiments/bluewaters/data/hdf5'
store = pd.HDFStore(os.path.join(hdf5dir, 'store.h5'))
In [17]:
mongo, db, dbname, cname, pname = ru.mongodb_connect(dburl)
for exp in experiments:
for session_id in select_exp_from_repo(exp):
stat_session(store, db, cachedir, session_id)
#store.close()
mongo.disconnect()
In [18]:
session_names = [r._v_name for r in store.root]
#session_names = list(set([x.split('/', 2)[1] for x in store.keys()]))
session_names
Out[18]:
In [19]:
sessions = [store['%s/session' % s_name] for s_name in session_names]
In [74]:
#
# Transpose raw DF into a CU oriented datastructure
# TODO: This needs to be converted to a DF straight away
#
# def prof2uids(rawdf):
# units = {}
# # Using "native" Python
# #units[exp]['all'] = [x for x in df.uid[df.uid > 0].unique() if x.startswith('unit')]
# units['all'] = [x for x in rawdf.uid.dropna().unique() if x.startswith('unit')]
# units['cloned']= [x for x in units['all'] if 'clone' in x]
# units['real'] = list(set(units['all']) - set(units['cloned']))
# # Or alternatively, with Pandas
# #uids_s = df['uid']
# #all_units_s = uids_s.loc[uids_s.str.startswith('unit.', na=False)].drop_duplicates()
# #units[exp]['all'] = set(all_units_s)
# #cloned_units_s = all_units_s.loc[all_units_s.str.contains('clone')]
# #units[exp]['cloned'] = set(cloned_units_s)
# #units[exp]['real'] = units[exp]['all'] - units[exp]['cloned']
# return units
# for exp, u in units.iteritems():
# print "Experiment:'%s', all:'%d', real:'%d', cloned:'%d'" % \
# (exp, len(u['all']), len(u['real']), len(u['cloned']))
In [72]:
# "label", "component", "event", "message"
# elements = [
# ('a_get_u', 'MainThread', 'get', 'MongoDB to Agent (PendingExecution)'),
# ('a_build_u', 'MainThread', 'Agent get unit meta', ''),
# ('a_mkdir_u', 'MainThread', 'Agent get unit mkdir', ''),
# ('a_notify_alloc', 'MainThread', 'put', 'Agent to update_queue (Allocating)'),
# ('a_to_s', 'MainThread', 'put', 'Agent to schedule_queue (Allocating)'),
# ('s_get_alloc', 'CONTINUOUS', 'get', 'schedule_queue to Scheduler (Allocating)'),
# ('s_alloc_failed', 'CONTINUOUS', 'schedule', 'allocation failed'),
# ('s_allocated', 'CONTINUOUS', 'schedule', 'allocated'),
# ('s_to_ewo', 'CONTINUOUS', 'put', 'Scheduler to execution_queue (Allocating)'),
# ('s_unqueue', 'CONTINUOUS', 'unqueue', 're-allocation done'),
# ('ewo_get', 'ExecWorker-', 'get', 'executing_queue to ExecutionWorker (Executing)'),
# ('ewo_launch', 'ExecWorker-', 'ExecWorker unit launch', ''),
# ('ewo_spawn', 'ExecWorker-', 'ExecWorker spawn', ''),
# ('ewo_script', 'ExecWorker-', 'launch script constructed', ''),
# ('ewo_pty', 'ExecWorker-', 'spawning passed to pty', ''),
# ('ewo_notify_exec', 'ExecWorker-', 'put', 'ExecWorker to update_queue (Executing)'),
# ('ewo_to_ewa', 'ExecWorker-', 'put', 'ExecWorker to watcher (Executing)'),
# ('ewa_get', 'ExecWatcher-', 'get', 'ExecWatcher picked up unit'),
# ('ewa_complete', 'ExecWatcher-', 'execution complete', ''),
# ('ewa_notify_so', 'ExecWatcher-', 'put', 'ExecWatcher to update_queue (StagingOutput)'),
# ('ewa_to_sow', 'ExecWatcher-', 'put', 'ExecWatcher to stageout_queue (StagingOutput)'),
# ('sow_get_u', 'StageoutWorker-', 'get', 'stageout_queue to StageoutWorker (StagingOutput)'),
# ('sow_u_done', 'StageoutWorker-', 'final', 'stageout done'),
# ('sow_notify_done', 'StageoutWorker-', 'put', 'StageoutWorker to update_queue (Done)'),
# ('uw_get_alloc', 'UpdateWorker-', 'get', 'update_queue to UpdateWorker (Allocating)'),
# ('uw_push_alloc', 'UpdateWorker-', 'unit update pushed (Allocating)', ''),
# ('uw_get_exec', 'UpdateWorker-', 'get', 'update_queue to UpdateWorker (Executing)'),
# ('uw_push_exec', 'UpdateWorker-', 'unit update pushed (Executing)', ''),
# ('uw_get_so', 'UpdateWorker-', 'get', 'update_queue to UpdateWorker (StagingOutput)'),
# ('uw_push_so', 'UpdateWorker-', 'unit update pushed (StagingOutput)', ''),
# ('uw_get_done', 'UpdateWorker-', 'get', 'update_queue to UpdateWorker (Done)'),
# ('uw_push_done', 'UpdateWorker-', 'unit update pushed (Done)', '')
# ]
# print "Number of entries: %d" % len(elements)
# [e[0] for e in elements]
#edf = pd.DataFrame(elements, columns=["label", "component", "event", "message"])
In [22]:
#
# Lookup tuples in dataframe based on uid and the tuple from the elements list
#
def tup2ts(df, uid, tup):
#print uid
#print tup
all_for_uid = df[df.uid == uid].fillna('')
val = all_for_uid[(all_for_uid.component.str.startswith(tup[1])) &
(all_for_uid.event == tup[2]) &
(all_for_uid.message == tup[3])].time
try:
return val.iloc[0]
except Exception as e:
return np.NaN
In [71]:
#
# Construct a unit based dataframe from a raw dataframe
#
def prof2df(rawdf, units):
# TODO: create skip logic
#if exp in indices and exp in info:
# continue
indices = [unit for unit in units['real']]
info = [{t[0]:tup2ts(rawdf, unit, t) for t in rpu.prof_entries} for unit in units['real']]
# TODO: Also do this for cloned units
return pd.DataFrame(info) # , index=indices[exp]).sort_index()
In [24]:
#
# Method to create a column based on two other columns using an operator
#
import operator
def create_column(df, lhs, rhs, operator):
return operator(df[lhs], df[rhs])
In [25]:
#
# Add additional (derived) colums to dataframes
#
def add_derived(df):
df['executor_queue'] = create_column(df, 'ewo_get', 's_to_ewo', operator.sub)
df['raw_runtime'] = create_column(df, 'ewa_complete', 'ewo_launch', operator.sub)
df['full_runtime'] = create_column(df, 'uw_push_done', 's_to_ewo', operator.sub)
df['watch_delay'] = create_column(df, 'ewa_get', 'ewo_to_ewa', operator.sub)
df['allocation'] = create_column(df, 's_allocated', 'a_to_s', operator.sub)
In [26]:
for exp in experiments:
#
# Get raw CSV datasets as DataFrames based on selection filter
#
session_ids = select_exp_from_repo(
exp,
{
# 'cu_cores': 128,
# 'cu_count': 500,
# 'nodes': 10,
# 'exec_workers': 1,
# 'spawner': 'shell',
# 'launcher': 'orte'
}
)
for sid in session_ids:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
prof_file = os.path.join(profdir, sid + '-' + pid + '.prof')
#print prof_file
raw_prof_data = pd.read_csv(prof_file)
#print raw_prof_data
units = prof2uids(raw_prof_data)
df = prof2df(raw_prof_data, units)
store.put('%s/prof/%s' % (norm_sid, normalize_id(pid)), df)
In [1187]:
orte0 = store.get('rp.session.netbook.mark.016525.0027/units')
aprun0 = store.get('rp.session.netbook.mark.016525.0028/units')
ccm0 = store.get('rp.session.netbook.mark.016525.0030/units')
In [ ]:
orte0_ttc = max(orte0['Done']) - min(orte0['Executing'])
ccm0_ttc = max(ccm0['Done']) - min(ccm0['Executing'])
aprun0_ttc = max(aprun0['Done']) - min(aprun0['Executing'])
#df = pandas.DataFrame([aprun0_ttc, ccm0_ttc, orte0_ttc], ['aprun', 'ccm', 'orte'])
#df = pandas.DataFrame([[1,2,3]])
df = pd.DataFrame([{'APRUN': aprun0_ttc, 'CCM/SSH': ccm0_ttc, 'ORTE': orte0_ttc}])
ax = df.plot(kind='bar', )
ax.set_ylabel("TTC (s)")
#ax.set_xlabel('Method')
ax.set_title("Time to Completion (TTC)\n10 nodes, 100 x 32 core \"sleep 0\" tasks.\n")
#grid('off')
ax.set_xticklabels("")
ax.set_ylim(0,70)
#savefig('ttc.pdf')
In [ ]:
orte0['Runtime'] = orte0['Done'] - orte0['Executing']
aprun0['Runtime'] = aprun0['Done'] - aprun0['Executing']
ccm0['Runtime'] = ccm0['Done'] - ccm0['Executing']
In [ ]:
ax = aprun0['Runtime'].plot(kind='hist', title='APRUN, 10 nodes, 320 cores, 100x sleep 0, 32 cores')
ax.set_ylabel('Occurences')
ax.set_xlabel('Runtime (s)')
In [ ]:
ax = ccm0['Runtime'].plot(kind='hist', title='CCM/SSH, 10 nodes, 320 cores, 100x sleep 0, 32 cores')
ax.set_ylabel('Occurences')
ax.set_xlabel('Runtime (s)')
In [ ]:
ax = orte0['Runtime'].plot(kind='hist', title='ORTE, 10 nodes, 320 cores, 100x sleep 0, 32 cores')
ax.set_ylabel('Occurences')
ax.set_xlabel('Runtime (s)')
In [49]:
#
# Calculate TTC
#
cu_runtime = [0, 1, 10, 30, 60, 120, 300, 600]
orte_ids = {}
orte_ttc = {}
orte_execq = {}
for runtime in cu_runtime:
orte_ids[runtime] = select_exp_from_repo(
exp1,
{
'cu_runtime': runtime
}
)
orte_ttc[runtime] = []
orte_execq[runtime] = []
for sid in orte_ids[runtime]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# TTC
orte_ttc[runtime].append(df['ewa_complete'].max() - df['a_to_s'].min())
orte_df = pd.DataFrame(orte_ttc)
stddev = orte_df.std()
ax = orte_df.mean().plot(kind='bar', yerr=stddev)
#ax.set_title('TTC to execute 512 x single core CUs of X seconds.')
ax.set_ylabel('TTC (s)')
ax.set_xlabel('CU duration (s)')
mp.pyplot.savefig('../plots/exp1_ttc_var-cu-duration.pdf')
In [50]:
#
# Calculate TTC, in relation to execution time / optimal duration
#
cu_runtime = [0, 1, 10, 30, 60, 120, 300, 600]
orte_ids = {}
orte_ttc = {}
#orte_optimal = {}
orte_run = {}
for runtime in cu_runtime:
orte_ids[runtime] = select_exp_from_repo(
exp1,
{
'cu_runtime': runtime
}
)
orte_ttc[runtime] = []
#orte_optimal[runtime] = []
orte_run[runtime] = []
for sid in orte_ids[runtime]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# TTC
orte_ttc[runtime].append(df['ewa_complete'].max() - df['a_to_s'].min())
# Cumulative runtime
orte_run[runtime].append(
(exp1[sid]['cu_count'] * (df['ewa_complete'] - df['ewo_launch']).mean()) /
exp1[sid]['pilot_cores'])
# Optimal runtime
#orte_optimal[runtime].append(
# (exp1[sid]['cu_count'] * exp1[sid]['cu_runtime']) /
# exp1[sid]['pilot_cores'])
orte_df = pd.DataFrame()
orte_df['ttc'] = pd.DataFrame(orte_ttc).mean()
#orte_df['optimal'] = pd.DataFrame(orte_optimal).mean()
#orte_df['overhead'] = orte_df['ttc'] - orte_df['optimal']
#orte_df[['optimal', 'overhead']].plot(kind='bar', stacked=True)
orte_df['Execution'] = pd.DataFrame(orte_run).mean()
orte_df['ExecWorker Queue'] = orte_df['ttc'] - orte_df['Execution']
ax = orte_df[['ExecWorker Queue', 'Execution']].plot(kind='bar', stacked=True)
#ax.set_title('TTC of 512 single core CUs with varying runtimes.')
ax.set_ylabel('TTC (s)')
ax.set_xlabel('CU duration (s)')
mp.pyplot.savefig('../plots/exp1_ttc_var-cu-duration_split.pdf')
In [55]:
#
# Calculate efficiency
#
cu_runtime = [0, 1, 10, 30, 60, 120, 300, 600]
orte_ids = {}
orte_ttc = {}
orte_run = {}
for runtime in cu_runtime:
orte_ids[runtime] = select_exp_from_repo(
exp1,
{
'cu_runtime': runtime
}
)
orte_ttc[runtime] = []
orte_run[runtime] = []
for sid in orte_ids[runtime]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# TTC
orte_ttc[runtime].append(df['ewa_complete'].max() - df['a_to_s'].min())
# Cumulative runtime
orte_run[runtime].append(
(exp1[sid]['cu_count'] * (df['ewa_complete'] - df['ewo_launch']).mean()) /
exp1[sid]['pilot_cores'])
orte_df = pd.DataFrame()
orte_df['ttc'] = pd.DataFrame(orte_ttc).mean()
orte_df['Execution'] = pd.DataFrame(orte_run).mean()
orte_df['ExecWorker Queue'] = orte_df['ttc'] - orte_df['Execution']
orte_df['Efficiency'] = orte_df['Execution'] / orte_df['ttc'] * 100
# TODO: error bars
ax = orte_df['Efficiency'].plot(kind='bar')
#ax.set_title('Efficiency of core utilization for single core CUs with various runtimes.')
ax.set_ylabel('Core Utilisation Efficiency (%)')
ax.set_xlabel('CU duration (s)')
mp.pyplot.savefig('../plots/exp1_var-cu-duration_eff.pdf')
In [69]:
#
# Calculate Exec Queue overhead
#
cu_runtime = [0, 1, 10, 30, 60, 120, 300, 600]
orte_ids = {}
orte_ttc = {}
orte_execq = {}
for runtime in cu_runtime:
orte_ids[runtime] = select_exp_from_repo(
exp1,
{
'cu_runtime': runtime
}
)
orte_ttc[runtime] = []
orte_execq[runtime] = []
for sid in orte_ids[runtime]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# Exec Queue time
orte_execq[runtime].append((df['ewo_get'] - df['s_to_ewo']).mean())
orte_df = pd.DataFrame(orte_execq)
stddev = orte_df.std()
ax = orte_df.mean().plot(kind='bar', yerr=stddev)
#ax.set_title('ExecWorker Queuing time for varying CU duration.')
ax.set_ylabel('Mean ExecWorker Queuing (s)')
ax.set_xlabel('CU duration (s)')
mp.pyplot.savefig('../plots/exp1_var-cu-duration_queueing.pdf')
In [70]:
#
# Calculate TTC
#
cu_cores = [1, 2, 4, 8, 16, 32, 64, 128, 256]
orte_ids = {}
orte_ttc = {}
for cores in cu_cores:
orte_ids[cores] = select_exp_from_repo(
exp2,
{
'cu_cores': cores
}
)
orte_ttc[cores] = []
for sid in orte_ids[cores]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
orte_ttc[cores].append(df['ewa_complete'].max() - df['a_to_s'].min())
orte_df = pd.DataFrame(orte_ttc)
stddev = orte_df.std()
ax = orte_df.mean().plot(kind='bar', yerr=stddev)
#ax.set_title('TTC to consume 256 cores (8*32 core nodes),\nfor 4x60 seconds, with varying cores per CU.')
ax.set_ylabel('TTC (s)')
ax.set_xlabel('# cores per CU')
#ax.set_ylim(200, 400)
mp.pyplot.savefig('../plots/exp2_ttc_var-cu-cores.pdf')
In [58]:
#
# Calculate TTC, split up
#
cu_cores = [1, 2, 4, 8, 16, 32, 64, 128, 256]
orte_ids = {}
orte_run = pd.DataFrame()
orte_alloc = pd.DataFrame()
orte_execq = pd.DataFrame()
orte_total = pd.DataFrame()
for cores in cu_cores:
orte_ids[cores] = select_exp_from_repo(
exp2,
{
'cu_cores': cores,
}
)
for sid in orte_ids[cores]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
dfr = df['ewa_complete'] - df['ewo_launch']
if cores in orte_run:
orte_run[cores] = pd.concat([orte_run[cores], dfr], ignore_index=True)
else:
orte_run[cores] = dfr
dfq = df['ewo_get'] - df['s_to_ewo']
if cores in orte_execq:
orte_execq[cores] = pd.concat([orte_execq[cores], dfq], ignore_index=True)
else:
orte_execq[cores] = dfq
orte_df = pd.DataFrame()
orte_df['ExecWorker Queue'] = orte_execq.mean()
orte_df['Execution'] = orte_run.mean()
ax = orte_df.plot(kind='bar', stacked=True)
#ax.set_title('CU WorkQueue and Execution time with varying cores per CU.')
ax.set_ylabel('time (s)')
ax.set_xlabel('# cores per CU')
mp.pyplot.savefig('../plots/exp2_var-cu-cores_split.pdf')
In [59]:
#
# Core utilization for varying core counts
#
cu_cores = [1, 2, 4, 8, 16, 32, 64, 128, 256]
orte_ids = {}
orte_run = {}
orte_ttc = {}
for cores in cu_cores:
orte_ids[cores] = select_exp_from_repo(
exp2,
{
'cu_cores': cores,
}
)
orte_ttc[cores] = []
orte_run[cores] = []
for sid in orte_ids[cores]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# Cumulative runtime
orte_run[cores].append(
((df['ewa_complete']-df['ewo_launch'] )).mean() *
(exp2[sid]['cu_count'] * exp2[sid]['cu_cores']) / exp2[sid]['pilot_cores'] )
orte_ttc[cores].append(df['ewa_complete'].max() - df['a_to_s'].min())
orte_df = pd.DataFrame()
orte_df['ttc'] = pd.DataFrame(orte_ttc).mean()
orte_df['Execution'] = pd.DataFrame(orte_run).mean()
orte_df['Efficiency'] = orte_df['Execution'] / orte_df['ttc'] * 100
# TODO: error bars
ax = orte_df['Efficiency'].plot(kind='bar', stacked=True)
#ax.set_title('Core utilization for varying core counts per CU.')
ax.set_ylabel('Core Utilization Efficiency (%)')
ax.set_xlabel('# cores per CU')
mp.pyplot.savefig("../plots/exp2_var-cu-cores_eff.pdf")
In [65]:
#
# Calculate TTC
#
num_workers = range(1,9)
orte_ids = {}
orte_ttc = {}
orte_execq = {}
for workers in num_workers:
orte_ids[workers] = select_exp_from_repo(
exp3,
{
'number_of_workers': workers
}
)
orte_ttc[workers] = []
orte_execq[workers] = []
for sid in orte_ids[workers]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# TTC
orte_ttc[workers].append(df['ewa_complete'].max() - df['a_to_s'].min())
orte_df = pd.DataFrame(orte_ttc)
stddev = orte_df.std()
ax = orte_df.mean().plot(kind='bar', yerr=stddev)
#ax.set_title('TTC to execute 512 single core CUs of 0 seconds.')
ax.set_ylabel('time (s)')
ax.set_xlabel('# ExecWorkers')
mp.pyplot.savefig("../plots/exp3_ttc_var-exec-workers.pdf")
In [66]:
#
# Launch rate
#
num_workers = range(1,9)
orte_ids = {}
orte_ttc = {}
orte_execq = {}
for workers in num_workers:
orte_ids[workers] = select_exp_from_repo(
exp3,
{
'number_of_workers': workers
}
)
orte_ttc[workers] = []
orte_execq[workers] = []
for sid in orte_ids[workers]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
# TTC
orte_ttc[workers].append(exp3[sid]['cu_count'] / (df['ewa_complete'].max() - df['a_to_s'].min()))
orte_df = pd.DataFrame(orte_ttc)
stddev = orte_df.std()
ax = orte_df.mean().plot(kind='bar', yerr=stddev)
#ax.set_title('Launch rates for single core CUs of 0 seconds with varying number of ExecWorkers.')
ax.set_ylabel('Launch rate (Executions/s)')
ax.set_xlabel('# ExecWorkers')
mp.pyplot.savefig("../plots/exp3_var-exec-workers_launchrate.pdf")
In [67]:
#
# Calculate CU time, split up
#
num_workers = range(1,9)
orte_ids = {}
orte_run = pd.DataFrame()
orte_execq = pd.DataFrame()
for workers in num_workers:
orte_ids[workers] = select_exp_from_repo(
exp3,
{
'number_of_workers': workers,
}
)
for sid in orte_ids[workers]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
dfr = df['ewa_complete'] - df['ewo_launch']
if workers in orte_run:
orte_run[workers] = pd.concat([orte_run[workers], dfr], ignore_index=True)
else:
orte_run[workers] = dfr
dfq = df['ewo_get'] - df['s_to_ewo']
if workers in orte_execq:
orte_execq[workers] = pd.concat([orte_execq[workers], dfq], ignore_index=True)
else:
orte_execq[workers] = dfq
orte_df = pd.DataFrame()
orte_df['ExecWorker Queue'] = orte_execq.mean()
orte_df['Execution'] = orte_run.mean()
ax = orte_df.plot(kind='bar', stacked=True)
#ax.set_title('CU ExecWorker Queing and Execution time (s),\nwith varying number of ExecWorkers.')
ax.set_ylabel('time (s)')
ax.set_xlabel('# ExecWorkers')
mp.pyplot.savefig("../plots/exp3_var-exec-workers_split.pdf")
In [68]:
#
# Plot ExecWorker Queuing Delay, for varying number of ExecWorkers, with launches shown over time.
#
num_workers = range(1,9)
orte_ids = {}
orte_execq = pd.DataFrame()
for workers in num_workers:
orte_ids[workers] = select_exp_from_repo(
exp3,
{
'number_of_workers': workers,
'iteration': 2 # Only one as these values are difficult to combine
}
)
for sid in orte_ids[workers]:
norm_sid = normalize_id(sid)
# Get multiple pilots from session
for pid in store.get('%s/pilots' % norm_sid)['pid']:
df = store.get('/%s/prof/%s' % (norm_sid, normalize_id(pid)))
df['execq'] = df['ewo_get'] - df['s_to_ewo']
dfq = pd.Series(df.sort('s_to_ewo')['execq'])
dfq.index = range(exp3[sid]['cu_count'])
orte_execq[workers] = dfq
ax = orte_execq.plot(colormap='Paired')
ax.set_title('Per CU ExecWorker Queuing time,\nfor different number of ExecWorkers.')
ax.set_ylabel('Exec Worker Queuing time (s)')
ax.set_xlabel('CU instances (chronological)')
mp.pyplot.savefig("../plots/exp3_var-exec-workers_chrono.pdf")