In [1]:
%matplotlib inline
import numpy as np
import scipy as sp
import pandas as pd
import scipy.stats as sps
import statsmodels.api as sm
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec
import radical.utils as ru
import radical.pilot as rp
import radical.analytics as ra
from IPython.display import display
from scipy.stats import linregress
from IPython.core.display import display, HTML
from __future__ import unicode_literals
pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_rows', 50)
In [2]:
# Global configurations
# ---------------------
# Expand the notebook to the width of the browser
display(HTML("<style>.container { width:100% !important; }</style>"))
# Matplotlib style
plt.style.use('seaborn-ticks')
# Use LaTeX and its body font for the diagrams' text.
mpl.rcParams['text.usetex'] = True
mpl.rcParams['text.latex.unicode'] = True
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.serif'] = ['Nimbus Roman Becker No9L']
# Font sizes
SIZE = 24
plt.rc('font', size=SIZE) # controls default text sizes
plt.rc('axes', titlesize=SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=SIZE) # fontsize of the x any y labels
plt.rc('xtick', labelsize=SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SIZE-2) # legend fontsize
plt.rc('figure', titlesize=SIZE) # # size of the figure title
# Use thinner lines for axes to avoid distractions.
mpl.rcParams['axes.linewidth'] = 0.75
mpl.rcParams['xtick.major.width'] = 0.75
mpl.rcParams['xtick.minor.width'] = 0.75
mpl.rcParams['ytick.major.width'] = 0.75
mpl.rcParams['ytick.minor.width'] = 0.75
mpl.rcParams['lines.linewidth'] = 2
# Do not use a box for the legend to avoid distractions.
mpl.rcParams['legend.frameon'] = False
# Restore part of matplotlib 1.5 behavior
mpl.rcParams['patch.force_edgecolor'] = True
mpl.rcParams['errorbar.capsize'] = 3
# Helpers
# -------
# Use coordinated colors. These are the "Tableau 20" colors as
# RGB. Each pair is strong/light. For a theory of color
tableau20 = [(31 , 119, 180), (174, 199, 232), # blue [ 0,1 ]
(255, 127, 14 ), (255, 187, 120), # orange [ 2,3 ]
(44 , 160, 44 ), (152, 223, 138), # green [ 4,5 ]
(214, 39 , 40 ), (255, 152, 150), # red [ 6,7 ]
(148, 103, 189), (197, 176, 213), # purple [ 8,9 ]
(140, 86 , 75 ), (196, 156, 148), # brown [10,11]
(227, 119, 194), (247, 182, 210), # pink [12,13]
(188, 189, 34 ), (219, 219, 141), # yellow [14,15]
(23 , 190, 207), (158, 218, 229), # cyan [16,17]
(65 , 68 , 81 ), (96 , 99 , 106), # gray [18,19]
(127, 127, 127), (143, 135, 130), # gray [20,21]
(165, 172, 175), (199, 199, 199), # gray [22,23]
(207, 207, 207)] # gray [24]
# Scale the RGB values to the [0, 1] range, which is the format
# matplotlib accepts.
for i in range(len(tableau20)):
r, g, b = tableau20[i]
tableau20[i] = (r / 255., g / 255., b / 255.)
# Return a single plot without right and top axes
def fig_setup():
fig = plt.figure(figsize=(13,7))
ax = fig.add_subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
return fig, ax
In [3]:
# Load wrangled data saved in .csv files.
sosg = pd.read_csv('data/sessions.csv', index_col=0)
posg = pd.read_csv('data/pilots.csv', index_col=0)
uosg = pd.read_csv('data/units.csv', index_col=0)
# Drop experiments with at least 8 tasks but less than 8 posg.
sosg = sosg[sosg.experiment != 'exp1']
posg = posg[posg.experiment != 'exp1']
uosg = uosg[uosg.experiment != 'exp1']
sosg = sosg[sosg.experiment != 'exp2']
posg = posg[posg.experiment != 'exp2']
uosg = uosg[uosg.experiment != 'exp2']
sosg = sosg[sosg.experiment != 'exp3']
posg = posg[posg.experiment != 'exp3']
uosg = uosg[uosg.experiment != 'exp3']
# Check for spurious data due to bugs and drop them
sosg = sosg.drop(['rp.session.radical.mturilli.017128.0000',
'rp.session.radical.mturilli.017139.0003',
# 'rp.session.radical.mturilli.017131.0005', # outlier Tq
'rp.session.radical.mturilli.017230.0003', # outlier TTC (Tr)
'rp.session.radical.mturilli.017212.0011', # inconsistency in concurrency
'rp.session.radical.mturilli.017223.0000', # inconsistency in concurrency
'rp.session.radical.mturilli.017228.0001']) # inconsistency in concurrency
# 'rp.session.radical.mturilli.017229.0003']) # inconsistency in concurrency
posg = posg[posg.sid != 'rp.session.radical.mturilli.017139.0003']
posg = posg[posg.sid != 'rp.session.radical.mturilli.017128.0000']
# posg = posg[posg.sid != 'rp.session.radical.mturilli.017131.0005']
posg = posg[posg.sid != 'rp.session.radical.mturilli.017230.0003']
posg = posg[posg.sid != 'rp.session.radical.mturilli.017212.0011']
posg = posg[posg.sid != 'rp.session.radical.mturilli.017223.0000']
posg = posg[posg.sid != 'rp.session.radical.mturilli.017228.0001']
posg = posg[posg.sid != 'rp.session.radical.mturilli.017229.0003']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017139.0003']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017128.0000']
# uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017131.0005']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017230.0003']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017212.0011']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017223.0000']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017228.0001']
uosg = uosg[uosg.sid != 'rp.session.radical.mturilli.017229.0003']
# Derive number of uosg requested and number of uosg
# rescheduled due to pilot halted/migrated by condor
sosg.loc[(sosg['nunit'] >= 8 ) & (sosg['nunit'] < 16 ), 'nunit_requested'] = 8
sosg.loc[(sosg['nunit'] >= 16 ) & (sosg['nunit'] < 32 ), 'nunit_requested'] = 16
sosg.loc[(sosg['nunit'] >= 32 ) & (sosg['nunit'] < 64 ), 'nunit_requested'] = 32
sosg.loc[(sosg['nunit'] >= 64 ) & (sosg['nunit'] < 128 ), 'nunit_requested'] = 64
sosg.loc[(sosg['nunit'] >= 128 ) & (sosg['nunit'] < 256 ), 'nunit_requested'] = 128
sosg.loc[(sosg['nunit'] >= 256 ) & (sosg['nunit'] < 512 ), 'nunit_requested'] = 256
sosg.loc[(sosg['nunit'] >= 512 ) & (sosg['nunit'] < 1024), 'nunit_requested'] = 512
sosg.loc[(sosg['nunit'] >= 1024) & (sosg['nunit'] < 2048), 'nunit_requested'] = 1024
sosg.loc[(sosg['nunit'] >= 2048) & (sosg['nunit'] < 4096), 'nunit_requested'] = 2048
sosg['nunit_rescheduled'] = sosg['nunit'] - sosg['nunit_requested']
# Eliminate spurious runs with number of pilots > number of units
sosg = sosg[sosg.npilot <= sosg.nunit_requested]
posg_completed = posg[posg.sid.isin(sosg.sid)]
uosg_completed = uosg[uosg.sid.isin(sosg.sid)]
In [4]:
print 'Total number of successful runs: %s' % sosg.shape[0]
print 'Total number of pilots: %s' % posg.shape[0]
print 'Total number of units: %s' % uosg.shape[0]
Experiment | #Pilots | |
---|---|---|
1 | 8,16,32,64,128,256,512.................. | 8 |
2 | 16,32,64,128,256,512,1024......... | 16 |
3 | 32,64,128,256,512,1024,2048 | 32 |
4 | 64,128,256,512,1024,2048 | 64 |
5 | 128,256,512,1024,2048 | 128 |
6 | 256,512,1024,2048 | 256 |
7 | 512,1024,2048 | 512 |
8 | 1024,2048 | 1024 |
9 | 2048 | 2048 |
Template to define dynamism:
{E,R,M}
The distribution of M measures the dynamism of E on R
ID | Entity | Resource | Measure | Plot | Description |
---|---|---|---|---|---|
1 | Workload | XOVC | $TTC$ | Histogram: x = $TTC$ (s); y = Frequency | For every experiment, the same instance of each run of each experiment is sequentially submitted to XOVC multiple times. The distribution of $TTC$ measures the dynamism of that instance of a run of a workload on XOVC. |
2 | Workload | XOVC | $TTQ$ | Histogram: x = $TTQ$ (s); y = Frequency | As 1 but with $TTQ$. |
3 | Workload | XOVC | $TTX$ | Histogram: x = $TTX$ (s); y = Frequency | As 1 but with $TTX$. |
4 | Pilot | XOVC | $T_q$ | Histogram: x = $T_q$ (s); y = Frequency | For every experiment, the same instance of pilot (<1; unspecified; XOVC>) is sequentially submitted to XOVC. The distribution of $T_q$ measures the dynamism of that instance of a pilot on XOVC. (We are not sure what we are measuring: the time the pilot has been waiting on the broker (the broker does late binding of jobs to hosts) or the time the pilot has been waiting on a host (the broker does early binding of jobs to hosts). I think the former and I have strated accumulating data/evidence for it.) |
5 | Pilot | XOVC | $T_r$ | Histogram: x = $T_r$ (s); y = Frequency | For every experiment, the same instance of pilot (<1; unspecified; XOVC>) is sequentially submitted to XOVC. The distribution of $T_r$ measures the dynamism of that instance of a pilot on XOVC. |
6 | Task | XOVC | $T_x$ | Histogram: x = $T_x$ (s); y = Frequency | For every experiment, the same instance of task (1715750072310 operations) is sequentially executed on the same host multiple times. The distribution of $T_x$ measures the dynamism of that instance of task on XOVC. |
10 | Pilot | Host | $T_r$ | Histogram: x = $T_r$ (s); y = Frequency | For every experiment, the same instance of pilot (<1; unspecified; XOVC>) become sequentially active on the same host. The distribution of $T_r$ describes the dynamism of that instance of a pilot on that host as measured by $T_r$. |
11 | Task | Host | $T_x$ | Histogram: x = $T_x$ (s); y = Frequency | For every experiment, the same instance of task (1715750072310 operations) is sequentially executed on the same host multiple times. The the distribution of $T_x$ measures the dynamism of that instance of task on that host. |
To be considered after dynamism:
ID | Entity | Resource | Measure | Plot | Description |
---|---|---|---|---|---|
11 | Workload | XOVC | $C_w$ | Histogram: x = Number of concurrent active workloads; y = Frequency | For every run of every experiment, for every experiment run, multiple instances of the same run are concurrently submitted to XOVC. The histogram describes the dynamism of that goup of runs' workload on XOVC as measured by the number of concurrent active (i.e., tasks of that workload are being executed) workloads ($C_w$) active at runtime |
12 | Workload | XOVC | $C_p$ | Histogram: x = Number of concurrent active pilots; y = Frequency | For every run of every experiment, for every experiment run, multiple instances of the same run are sequentially submitted to XOVC. The histogram describes the dynamism of that run's workload on XOVC as measured by the number of concurrent active (i.e., pilots are alive and ready to execute/executing tasks) pilots active at runtime ($C_p$) |
13 | Workload | XOVC | $C_h$ | Histogram: x = Number of concurrent active hosts; y = Frequency | For every run of every experiment, for every experiment run, multiple instances of the same run are sequentially submitted to XOVC. The histogram describes the dynamism of that run's workload on XOVC as measured by the number of concurrent active hosts (i.e., hosts where tasks are being executed) at runtime ($C_h$) |
14 | Workload | XOVC | $C_t$ | Histogram: x = Number of concurrent active tasks; y = Frequency | For every run of every experiment, for every experiment run, multiple instances of the same run are sequentially submitted to XOVC. The histogram describes describes the dynamism of that run's workload on XOVC as measured by the number of concurrent active (i.e., being executed) tasks at runtime ($C_t$) |
Notes:
Template to define homogeneity/heterogeneity:
{E,R,M}
The distribution of M measures the heterogeneity of E on R
ID | Entity | Resource | Measure | Plot | Description |
---|---|---|---|---|---|
4 | Pilot | XOVC | $T_q$ | Histogram: x = $T_q$ (s); y = Frequency | For every experiment, multiple instances of the same pilot (<1; unspecified; XOVC>) are concurrently submitted to XOVC. The distribution of $T_q$ measures the heterogeneity of that pilot on XOVC. |
5 | Pilot | XOVC | $T_r$ | Histogram: x = $T_r$ (s); y = Frequency | For every experiment, multiple instances of the same pilot (<1; unspecified; XOVC>) run concurrently on XOVC. The distribution of $T_r$ measures the heterogeneity of that pilot on XOVC. |
6 | Task | XOVC | $T_x$ | Histogram: x = $T_x$ (s); y = Frequency | For every experiment, multiple instances of the same task (1715750072310 operations) are concurrently executed on XOVC. The distribution of the $T_x$ of these instances measures the dynamism of that task on XOVC. |
~~7 ~~ | ~~Histogram: x = $TTC$ (s); y = Frequency | For every run of every experiment, multiple instances of the same run are concurrently executed on a host. The histogram describes the heterogeneity of that run's workload on that host as measured by the distribution of $TTC$. (very difficult to have enough data for this plot because we do not control the number of tasks that are executed on the same host and the degree of concurrency (i.e., the number of pilots/cores) with which those tasks are executed on that host.)~~ | |||
~~8 ~~ | |||||
~~9 ~~ | |||||
10 | Pilot | Host | $T_r$ | Histogram: x = $T_r$ (s); y = Frequency | For every experiment, multiple instances of the same pilot (<1; unspecified; XOVC>) run concurrently on a host. The distribution of $T_r$ measures the heterogeneity of that pilot on on that host. |
11 | Task | Host | $T_x$ | Histogram: x = $T_x$ (s); y = Frequency | For every experiment, multiple instances of the same task (1715750072310 operations) are concurrently executed on a host. The distribution of the $T_x$ of these instances measures the dynamism of that task on that host. |
In [5]:
# Drop runs that did not manage to execute all the requested units.
# We will want to plot these data: given a number of pilots and a
# number of units, how likely is that all the units will be executed
# before all the pilots fail/disappear?
sosg_completed = sosg.loc[sosg.nunit_requested <= sosg.nunit_done]
# Eliminate the units and pilots of unsuccessful runs
posg_completed = posg[posg.sid.isin(sosg.sid)]
uosg_completed = uosg[uosg.sid.isin(sosg.sid)]
In [6]:
ttc = sosg_completed[
['npilot','TTC','nunit_requested']].groupby(
['npilot','nunit_requested']).aggregate(
{'TTC':{'TTC_mean':'mean','TTC_std':'std'}})
ttc.columns = ttc.columns.droplevel(0)
ttc = ttc.reset_index()
ttx = sosg_completed[
['npilot','U_AGENT_EXECUTING','nunit_requested']].groupby(
['npilot','nunit_requested']).aggregate(
{'U_AGENT_EXECUTING':{'TTX_mean':'mean','TTX_std':'std'}})
ttx.columns = ttx.columns.droplevel(0)
ttx = ttx.reset_index()
ttq = sosg_completed[
['npilot','P_LRMS_QUEUING','nunit_requested']].groupby(
['npilot','nunit_requested']).aggregate(
{'P_LRMS_QUEUING':{'TTQ_mean':'mean','TTQ_std':'std'}})
ttq.columns = ttq.columns.droplevel(0)
ttq = ttq.reset_index()
In [7]:
print "Number of runs for each combination of #pilots and #units\nTable: rows = #units; columns = #pilots"
# Calculate how many runs we have for each #tasks/#pilots combination
vs = {}
idx = []
ns = sorted(sosg_completed.nunit_requested.unique().tolist())
for n in ns:
vs[int(n)] = []
idx.append(int(n))
for m in ns:
v = sosg_completed[(sosg_completed.npilot == n) & (sosg_completed.nunit_requested == m)].nunit_requested.value_counts().tolist()
if v:
vs[int(n)].append(int(v[0]))
else:
vs[int(n)].append(np.nan)
display(pd.DataFrame(vs, index=idx))
In [8]:
fig, ax = fig_setup()
colors=[tableau20[0] , tableau20[2] , tableau20[4] , tableau20[6] ,
tableau20[8] , tableau20[10], tableau20[12], tableau20[14],
tableau20[16], tableau20[18], tableau20[20], tableau20[1] ,
tableau20[3] , tableau20[5] , tableau20[7] , tableau20[9] ,
tableau20[11], tableau20[13], tableau20[15], tableau20[17],
tableau20[19]]
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Time to completion ($TTC$) of workloads with 8, 16, 32, 64, 128, 256, 512, 1024, 2048 tasks and pilots'
title_measure = '$TTC$ measures the dynamism of each workload on XOVC'
#title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
title = ''
fig.suptitle(title, y=1.05)
nus = ttx['nunit_requested'].unique()
nus.sort()
for nu in nus:
color = colors.pop(0)
ttc.loc[ttc.nunit_requested == nu].plot(
x='npilot',
y='TTC_mean',
yerr='TTC_std',
label=str(int(nu))+' units',
color=color,
ax=ax)
plt.axhline(43200, color='r', linestyle='dashed', linewidth=1, label='12 hours')
ax.legend(loc=1, ncol=2)
ax.set_xscale('log', basex=2)
ax.set_xlim(2**2, 2**12)
ax.set_ylim(-5000, 80000)
ax.set_xlabel('Number of Pilots Requested')
ax.set_ylabel('TTC (s)')
plt.savefig('figures/dynamism_1_ttc_workload_xovc.pdf', dpi=600, bbox_inches='tight')
In [9]:
# Filter sessions below 12 hours and rename columns with '_'
# because latex complains (latex is so cool... Not!)
df = sosg_completed.loc[
sosg_completed.TTC <= 43200].rename(
index=str,
columns={'nunit_requested' : 'nunitrequested',
'U_AGENT_EXECUTING': 'TTX',
'P_LRMS_QUEUING' : 'TTQ'})
# Convert column type to integer to display the number of
# units requested
df.nunitrequested = df.nunitrequested.astype(int)
#(df.nunitrequested/df.npilot <= 4) &
# Plot a boxplot for each number of unit. Adapt the size
# and the number of subplots to the number of units.
nunits = [64, 128, 256]
i = 0
fwidth = 26 # 13*len(nunits)
fhight = 4
fig = plt.figure(figsize=(fwidth,fhight))
for nunit in nunits:
i = i+1
ax = fig.add_subplot(1,len(nunits),i)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_ylim(0,21600)
dfu = df.rename(index=str, columns={'TTC' : 'TTC %s Tasks' % nunit})
dfu.loc[#(df.nunitrequested/df.npilot <= 16) &
(dfu.nunitrequested == nunit)].boxplot(column='TTC %s Tasks' % nunit,
by='npilot',
ax=ax,
grid=False,
showfliers=False)
ax.set_xlabel('Number of Pilots')
ax.set_ylabel('Time (s)')
# Remove title self-imposed by Pandas (face palm!)
# title='XSEDE OSG Virtual Cluster\n'+\
# 'Distribution of TTC for %s units. Spread of IQR does not depend on the number of pilots requested' % str(nunits)[1:-1]
# fig.suptitle(title, y=1.10)
fig.suptitle('')
plt.savefig('figures/dynamism_1a_ttc_workload_xovc_boxplot.pdf', dpi=600, bbox_inches='tight')
In [10]:
fig, ax = fig_setup()
colors=[tableau20[0] , tableau20[2] , tableau20[4] , tableau20[6] ,
tableau20[8] , tableau20[10], tableau20[12], tableau20[14],
tableau20[16], tableau20[18], tableau20[20], tableau20[1] ,
tableau20[3] , tableau20[5] , tableau20[7] , tableau20[9] ,
tableau20[11], tableau20[13], tableau20[15], tableau20[17],
tableau20[19]]
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Time to completion ($TTX$) of workloads with 8, 16, 32, 64, 128, 256, 512, 1024, 2048 tasks and pilots'
title_measure = '$TTX$ measures the dynamism of each workload on XOVC'
# title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
# fig.suptitle(title, y=1.05)
title = ''
nus = ttc['nunit_requested'].unique()
nus.sort()
for nu in nus:
ttx.loc[ttx['nunit_requested']==nu].plot(x='npilot', y='TTX_mean', yerr='TTX_std',
label=str(int(nu))+' units',
color=colors.pop(0),
ax=ax)
plt.axhline(43200, color='r', linestyle='dashed', linewidth=1, label='12 hours')
ax.legend(loc=1, ncol=2)
ax.set_xscale('log', basex=2)
ax.set_xlim(2**2, 2**12)
ax.set_ylim(-5000, 80000)
ax.set_xlabel('Number of Pilots Requested')
ax.set_ylabel('TTX (s)')
plt.savefig('figures/dynamism_2_ttx_workload_xovc.pdf', dpi=600, bbox_inches='tight')
In [11]:
fig, ax = fig_setup()
colors=[tableau20[0] , tableau20[2] , tableau20[4] , tableau20[6] ,
tableau20[8] , tableau20[10], tableau20[12], tableau20[14],
tableau20[16], tableau20[18], tableau20[20], tableau20[1] ,
tableau20[3] , tableau20[5] , tableau20[7] , tableau20[9] ,
tableau20[11], tableau20[13], tableau20[15], tableau20[17],
tableau20[19]]
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Time to completion ($TTQ$) of workloads with 8, 16, 32, 64, 128, 256, 512, 1024, 2048 tasks and pilots'
title_measure = '$TTQ$ measures the dynamism of each workload on XOVC'
# title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
# fig.suptitle(title, y=1.05)
title
nus = ttc['nunit_requested'].unique()
nus.sort()
for nu in nus:
ttq.loc[ttq['nunit_requested']==nu].plot(x='npilot', y='TTQ_mean', yerr='TTQ_std',
label=str(int(nu))+' units',
color=colors.pop(0),
ax=ax)
plt.axhline(43200, color='r', linestyle='dashed', linewidth=1, label='12 hours')
ax.legend(loc=1, ncol=2)
ax.set_xscale('log', basex=2)
ax.set_xlim(2**2, 2**12)
ax.set_ylim(-5000, 80000)
ax.set_xlabel('Number of Pilots Requested')
ax.set_ylabel('TTX (s)')
plt.savefig('figures/dynamism_3_ttq_workload_xovc.pdf', dpi=600, bbox_inches='tight')
In [12]:
# Get one pilot for each submission (they are submitted in bulk) so to guarantee that:
# - pilot submissions are sequential;
# - each pilot executed at least one unit;
# - the pilot's units were successfully executed.
psample = posg.loc[(posg.P_LRMS_RUNNING > 1) & (posg.nunit > 1)]
tr = []
tq = []
for sid in sosg.sid:
p = psample[psample.sid == sid].pid.tolist()
if p:
tr.append(psample[(psample.sid == sid) &
(psample.pid == p[0])].P_LRMS_RUNNING.tolist()[0])
tq.append(psample[(psample.sid == sid) &
(psample.pid == p[0])].P_LRMS_QUEUING.tolist()[0])
pdynxovc = pd.DataFrame({'$T_r$': tr, '$T_q$': tq})
In [13]:
fig, ax = fig_setup()
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Queue time ($T_q$) of %s single-core pilots sequentially submitted to XOVC' % len(tq)
title_measure = '$T_q$ measures the dynamism of this pilot on XOVC'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05)
pdynxovc[['$T_q$']].plot.hist(
ax=ax,
color='gray',
bins=30,
alpha = 0.25)
plt.axvline(pdynxovc['$T_q$'].median(), color='r', linestyle='dashed', linewidth=1)
# ax.set_xlim(0, 2000)
ax.set_xlabel('$T_q$ (s)')
ax.set_ylabel('Number of Pilots')
ax.legend(labels=['Median $%is$' % pdynxovc['$T_q$'].median(), 'Pilot Queue Time ($T_q$)'])
plt.savefig('figures/dynamism_4_tq_pilot_xovc.pdf', dpi=600, bbox_inches='tight')
In [14]:
fig, ax = fig_setup()
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Running time ($T_r$) of %s single-core pilots sequentially submitted to XOVC' % len(tq)
title_measure = '$T_r$ measures the dynamism of this pilot on XOVC'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05)
pdynxovc[['$T_r$']].plot.hist(
ax=ax,
color='gray',
bins=30,
alpha = 0.25)
plt.axvline(pdynxovc['$T_r$'].median(), color='r', linestyle='dashed', linewidth=1)
# ax.set_xlim(0, 2000)
ax.set_xlabel('$T_r$ (s)')
ax.set_ylabel('Number of Pilots')
ax.legend(labels=['Median $%is$' % pdynxovc['$T_r$'].median(), 'Pilot Running Time ($T_r$)'])
plt.savefig('figures/dynamism_5_tr_pilot_xovc.pdf', dpi=600, bbox_inches='tight')
In [15]:
# Get one task for each submission so to guarantee that:
# - task submissions are sequential;
# - each task has succesfully executed.
tsample = uosg.loc[(uosg.U_AGENT_EXECUTING > 1) & (uosg.DONE > 1)]
tx = []
for sid in sosg.sid:
t = tsample[tsample.sid == sid].uid.tolist()
if t:
tx.append(tsample[(tsample.sid == sid) &
(tsample.uid == t[0])].U_AGENT_EXECUTING.tolist()[0])
tdynxovc = pd.DataFrame({'$T_x$': tx})
In [16]:
fig, ax = fig_setup()
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Execution time ($T_x$) of %s single-core tasks sequentially executed on XOVC' % len(tx)
title_measure = '$T_x$ measures the dynamism of this task on XOVC'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05)
tdynxovc[['$T_x$']].plot.hist(
ax=ax,
color='gray',
bins=30,
alpha = 0.25)
plt.axvline(tdynxovc['$T_x$'].median(), color='r', linestyle='dashed', linewidth=1)
# ax.set_xlim(0, 2000)
ax.set_xlabel('$T_x$ (s)')
ax.set_ylabel('Number of Tasks')
ax.legend(labels=['Median $%is$' % tdynxovc['$T_x$'].median(), 'Task Execution Time ($T_x$)'])
plt.savefig('figures/dynamism_6_tx_task_xovc.pdf', dpi=600, bbox_inches='tight')
In [17]:
# For every session, get a single unit that completed execution on every used host
htr = pd.DataFrame()
for sid in sosg.sid:
for host in psample[psample.sid == sid].hid.unique().tolist():
if host == 'local':
continue
pilots = psample[(psample.sid == sid) &
(psample.hid == host)].pid.tolist()
if pilots:
htr = htr.append(psample[(psample.sid == sid) &
(psample.hid == host) &
(psample.pid == pilots[0])])
htr.reset_index(drop=True, inplace=True)
In [18]:
fig = plt.figure(figsize=(21,7))
ax = fig.add_subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
grouped = htr.groupby('hid')
df = pd.DataFrame({col:vals['P_LRMS_RUNNING'] for col,vals in grouped})
counts = df.count()
counts = counts.sort_values(ascending=False)
print 'Number of pilots executed per host:'
print counts[:15]
df = df[counts.index]
df.iloc[:,:15].boxplot(ax=ax, rot=75, grid=False)
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Running time ($T_r$) of %s single-core pilots sequentially submitted to %s hosts' % (len(htr), len(htr.hid.unique().tolist()))
title_measure = '$T_r$ measures the dynamism of this pilot on each host'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05, backgroundcolor='white', color='black')
ax.set_ylabel('Time (s)')
ax.set_xlabel('Host Name')
plt.savefig('figures/dynamism_10_tr_pilot_host.pdf', dpi=600, bbox_inches='tight')
In [19]:
# For every session, get a single unit that completed execution on every used host
htx = pd.DataFrame()
for sid in sosg.sid:
for host in tsample[tsample.sid == sid].hid.unique().tolist():
if host == 'local':
continue
units = tsample[(tsample.sid == sid) &
(tsample.hid == host)].uid.tolist()
if units:
htx = htx.append(tsample[(tsample.sid == sid) &
(tsample.hid == host) &
(tsample.uid == units[0])])
htx.reset_index(drop=True, inplace=True)
In [20]:
fig = plt.figure(figsize=(21,7))
ax = fig.add_subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
grouped = htx.groupby('hid')
df = pd.DataFrame({col:vals['U_AGENT_EXECUTING'] for col,vals in grouped})
counts = df.count()
counts = counts.sort_values(ascending=False)
print 'Number of tasks executed per host:'
print counts[:15]
df = df[counts.index]
df.iloc[:,:15].boxplot(ax=ax, rot=75, grid=False)
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Execution time ($T_x$) of %s single-core tasks sequentially submitted to %s hosts' % (len(htx), len(htx.hid.unique().tolist()))
title_measure = '$T_x$ measures the dynamism of this task on each host'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, backgroundcolor='white', color='black')
ax.set_ylabel('Time (s)')
ax.set_xlabel('Host Name')
plt.savefig('figures/dynamism_11_tx_task_host.pdf', dpi=600, bbox_inches='tight')
In [21]:
# Find pilots that:
# - have successfully run
# - have run concurrently on XOVC
psconc = pd.DataFrame()
npartition = 0
for s in sosg.sid.tolist():
# Select the units in a session that have completed and
# sort them by the time they started to execute
sps = posg[(posg.sid == s) &
(posg.PMGR_ACTIVE_PENDING > 1) &
(posg.PMGR_ACTIVE > 1)].sort_values('PMGR_ACTIVE_PENDING')
sps = sps.reset_index(drop=True)
# Search for all the pilots in the selected pilots that run concurrently.
# Use a special case: all the pilots that run concurrently before the
# shortest pilot stopped.
base = 0
base_end = sps[['PMGR_ACTIVE']].min().tolist()[0]
for i in sps.index.tolist():
if sps.loc[i].PMGR_ACTIVE_PENDING > base_end:
npartition += 1
sps = sps.assign(npartition=npartition)
cpilots = i-1
sps = sps.assign(cpilots=cpilots)
psconc = psconc.append(sps[base:i-1])
break
# Find the session with most concurrent units
psc = psconc.sort_values('cpilots', ascending=False)[0:1].sid.tolist()[0]
In [22]:
fig, ax = fig_setup()
psconcurrent = psconc[psconc.sid == psc]
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Queuing time ($T_q$) of %s instances of a single-core pilot concurrently submitted to XOVC' % len(psconcurrent.pid.tolist())
title_measure = '$T_q$ measures the heterogeneity of this pilot on XOVC'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05)
psconcurrent[['P_LRMS_QUEUING']].plot.hist(
ax=ax,
color='gray',
bins=30,
alpha = 0.25)
plt.axvline(psconcurrent['P_LRMS_QUEUING'].median(), color='r', linestyle='dashed', linewidth=1)
# ax.set_xlim(0, 2000)
ax.set_xlabel('$T_q$ (s)')
ax.set_ylabel('Number of Pilots')
ax.legend(labels=['Median $%is$' % psconcurrent['P_LRMS_QUEUING'].median(), 'Pilot Queuing Time ($T_q$)'])
plt.savefig('figures/heterogeneity_4_tq_pilot_xovc.pdf', dpi=600, bbox_inches='tight')
In [23]:
# Find pilots that:
# - have successfully run
# - have run concurrently on XOVC
psconc = pd.DataFrame()
npartition = 0
for s in sosg.sid.tolist():
# Select the units in a session that have completed and
# sort them by the time they started to execute
sps = posg[(posg.sid == s) &
(posg.PMGR_ACTIVE > 1)].sort_values('PMGR_ACTIVE')
sps = sps.reset_index(drop=True)
# Search for all the pilots in the selected pilots that run concurrently.
# Use a special case: all the pilots that run concurrently before the
# shortest pilot stopped.
base = 0
base_done = sps[['DONE']].min().tolist()[0]
base_fail = sps[['FAILED']].min().tolist()[0]
if base_fail is not np.nan:
base_end = base_fail
else:
base_end = base_done
for i in sps.index.tolist():
if sps.loc[i].PMGR_ACTIVE > base_end:
npartition += 1
sps = sps.assign(npartition=npartition)
cpilots = i-1
sps = sps.assign(cpilots=cpilots)
psconc = psconc.append(sps[base:i-1])
break
# Find the session with most concurrent units
psc = psconc.sort_values('cpilots', ascending=False)[0:1].sid.tolist()[0]
In [ ]:
fig, ax = fig_setup()
psconcurrent = psconc[psconc.sid == psc]
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Running time ($T_r$) of %s instances of a single-core pilot concurrently submitted to XOVC' % len(psconcurrent.pid.tolist())
title_measure = '$T_r$ measures the heterogeneity of this pilot on XOVC'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05)
psconcurrent[['P_LRMS_RUNNING']].plot.hist(
ax=ax,
color='gray',
bins=30,
alpha = 0.25)
plt.axvline(psconcurrent['P_LRMS_RUNNING'].median(), color='r', linestyle='dashed', linewidth=1)
# ax.set_xlim(0, 2000)
ax.set_xlabel('$T_r$ (s)')
ax.set_ylabel('Number of Pilots')
ax.legend(labels=['Median $%is$' % psconcurrent['P_LRMS_RUNNING'].median(), 'Pilot Running Time ($T_r$)'])
plt.savefig('figures/heterogeneity_5_tr_pilot_xovc.pdf', dpi=600, bbox_inches='tight')
In [ ]:
# Find units that:
# - have successfully executed
# - have executed concurrently on XOVC
usconc = pd.DataFrame()
npartition = 0
for s in sosg.sid.tolist():
# Select the units in a session that have completed and
# sort them by the time they started to execute
sus = uosg[(uosg.sid == s) &
(uosg.AGENT_EXECUTING > 1) &
(uosg.AGENT_STAGING_OUTPUT_PENDING > 1)].sort_values('AGENT_EXECUTING')
sus = sus.reset_index(drop=True)
# - Search for all the units in the selected units that executed concurrently.
# Use a special case: all the units that executed concurrently before the
# shortest unit finished. Due to the large variation of Tx, multiple units
# finish in the time a single unit takes to execute. Strictly speaking,
# these units are not executing concurrently.
# - Save the to a partition if they were more than 285 concurrent units.
base = 0
base_end = sus[['AGENT_STAGING_OUTPUT_PENDING']].min().tolist()[0]
for i in sus.index.tolist():
if sus.loc[i].AGENT_EXECUTING > base_end:
npartition += 1
sus = sus.assign(npartition=npartition)
cunits = i-1
sus = sus.assign(cunits=cunits)
usconc = usconc.append(sus[base:i-1])
break
# Find the session with most concurrent units
sc = usconc.sort_values('cunits', ascending=False)[0:1].sid.tolist()[0]
In [ ]:
fig, ax = fig_setup()
usconcurrent = usconc[usconc.sid == sc]
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Execution time ($T_x$) of %s instances of a single-core task concurrently executed on XOVC' % len(usconcurrent.uid.tolist())
title_measure = '$T_x$ measures the heterogeneity of this task on XOVC'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05)
usconcurrent[['U_AGENT_EXECUTING']].plot.hist(
ax=ax,
color='gray',
bins=30,
alpha = 0.25)
plt.axvline(usconcurrent['U_AGENT_EXECUTING'].median(), color='r', linestyle='dashed', linewidth=1)
# ax.set_xlim(0, 2000)
ax.set_xlabel('$T_x$ (s)')
ax.set_ylabel('Number of Tasks')
ax.legend(labels=['Median $%is$' % usconcurrent['U_AGENT_EXECUTING'].median(), 'Task Execution Time ($T_x$)'])
plt.savefig('figures/heterogeneity_6_tx_task_xovc.pdf', dpi=600, bbox_inches='tight')
In [ ]:
# Find pilots that:
# - have successfully run
# - have run concurrently on each host
psconc = pd.DataFrame()
# For every session, get a group of concurrent pilots for every host
for sid in sosg.sid.tolist():
# Select the pilots in a session that have run and
# sort them by the time they started to run
sps = posg[(posg.sid == sid) &
(posg.PMGR_ACTIVE > 1)].sort_values('PMGR_ACTIVE')
sps = sps.reset_index(drop=True)
# Partion the running pilots by host. Each partition contains
# pilots that run concurrently on a host.
npartition = 0
for host in sps.hid.unique().tolist():
# Skip host local, i.e., all hosts we were not able to know about.
if host == 'local':
continue
pilots = sps[sps.hid == host]
pilots = pilots.reset_index(drop=True)
# Search for all the pilots in the selected pilots that run concurrently
# on the selected host.
# Use a special case: all the pilots that run concurrently before the
# shortest pilot stopped.
base = 0
base_done = pilots[['DONE']].min().tolist()[0]
base_fail = pilots[['FAILED']].min().tolist()[0]
if base_fail is not np.nan:
base_end = base_fail
else:
base_end = base_done
# Partition the pilots by concurrency
for i in pilots.index.tolist():
if pilots.loc[i].PMGR_ACTIVE > base_end:
npartition += 1
pilots = pilots.assign(npartition=npartition)
cpilots = i-1
pilots = pilots.assign(cpilots=cpilots)
psconc = psconc.append(pilots[base:i-1])
break
if npartition == 0:
npartition = len(pilots.index.tolist())
pilots = pilots.assign(npartition=npartition)
cpilots = npartition
pilots = pilots.assign(cpilots=cpilots)
psconc = psconc.append(pilots[base:npartition])
# Find the session with most concurrent units for each host
psconc = psconc.reset_index(drop=True)
pchosts = pd.DataFrame()
for host in psconc.hid.dropna().unique().tolist():
slargest = psconc[psconc.hid == host].sort_values('cpilots', ascending=False)[0:1].sid.tolist()[0]
cpilots = psconc[psconc.hid == host].sort_values('cpilots', ascending=False)[0:1].cpilots.tolist()[0]
pchosts = pchosts.append(psconc[(psconc.sid == slargest) & (psconc.cpilots == cpilots)])
In [ ]:
fig = plt.figure(figsize=(21,7))
ax = fig.add_subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
grouped = pchosts.groupby('hid')
df = pd.DataFrame({col:vals['P_LRMS_RUNNING'] for col,vals in grouped})
counts = df.count()
counts = counts.sort_values(ascending=False)
print 'Number of pilots executed per host:'
print counts[:15]
df = df[counts.index]
df.iloc[:,:15].boxplot(ax=ax, rot=75, grid=False)
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Running time ($T_r$) of %s single-core pilots concurrently submitted to %s hosts' % (len(pchosts), len(pchosts.hid.unique().tolist()))
title_measure = '$T_r$ measures the heterogeneity of this pilot on each host'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05, backgroundcolor='white', color='black')
ax.set_ylabel('Time (s)')
ax.set_xlabel('Host Name')
plt.savefig('figures/heterogeneity_10_tr_pilot_host.pdf', dpi=600, bbox_inches='tight')
In [ ]:
# Find units that:
# - have successfully executed
# - have executed concurrently on XOVC
usconc = pd.DataFrame()
# For every session, get a group of concurrent pilots for every host
for sid in sosg.sid.tolist():
# Select the units in a session that have completed and
# sort them by the time they started to execute
sus = uosg[(uosg.sid == sid) &
(uosg.AGENT_EXECUTING > 1) &
(uosg.AGENT_STAGING_OUTPUT_PENDING > 1)].sort_values('AGENT_EXECUTING')
sus = sus.reset_index(drop=True)
# Partion the running pilots by host. Each partition contains
# pilots that run concurrently on a host.
npartition = 0
for host in sus.hid.unique().tolist():
# Skip host local, i.e., all hosts we were not able to know about.
if host == 'local':
continue
units = sus[sus.hid == host]
units = units.reset_index(drop=True)
# Search for all the pilots in the selected pilots that run concurrently
# on the selected host.
# Use a special case: all the pilots that run concurrently before the
# shortest pilot stopped.
base = 0
base_end = units[['AGENT_STAGING_OUTPUT_PENDING']].min().tolist()[0]
# Partition the pilots by concurrency
for i in units.index.tolist():
if units.loc[i].AGENT_EXECUTING > base_end:
npartition += 1
units = units.assign(npartition=npartition)
cunits = i-1
units = units.assign(cunits=cunits)
usconc = usconc.append(units[base:i-1])
break
if npartition == 0:
npartition = len(units.index.tolist())
units = units.assign(npartition=npartition)
cunits = npartition
units = units.assign(cunits=cunits)
usconc = usconc.append(units[base:npartition])
# Find the session with most concurrent units for each host
usconc = usconc.reset_index(drop=True)
uchosts = pd.DataFrame()
for host in usconc.hid.dropna().unique().tolist():
slargest = usconc[usconc.hid == host].sort_values('cunits', ascending=False)[0:1].sid.tolist()[0]
cunits = usconc[usconc.hid == host].sort_values('cunits', ascending=False)[0:1].cunits.tolist()[0]
uchosts = uchosts.append(usconc[(usconc.sid == slargest) & (usconc.cunits == cunits)])
In [ ]:
fig = plt.figure(figsize=(21,7))
ax = fig.add_subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
grouped = uchosts.groupby('hid')
df = pd.DataFrame({col:vals['U_AGENT_EXECUTING'] for col,vals in grouped})
counts = df.count()
counts = counts.sort_values(ascending=False)
print 'Number of units executed per host:'
print counts[:15]
df = df[counts.index]
df.iloc[:,:15].boxplot(ax=ax, rot=75, grid=False)
title_head = 'XSEDE OSG Virtual Cluster'
title_desc = 'Executing time ($T_x$) of %s single-core units concurrently executed on %s hosts' % (len(uchosts), len(uchosts.hid.unique().tolist()))
title_measure = '$T_x$ measures the heterogeneity of this task on each host'
title = '%s\n%s\n%s' % (title_head, title_desc, title_measure)
fig.suptitle(title, y=1.05, backgroundcolor='white', color='black')
ax.set_ylabel('Time (s)')
ax.set_xlabel('Host Name')
plt.savefig('figures/heterogeneity_11_tx_task_host.pdf', dpi=600, bbox_inches='tight')
In [ ]: