In [51]:
%matplotlib inline

import os
import sys
import glob
import pprint

import numpy as np
import scipy as sp
import pandas as pd
import scipy.stats as sps
import statsmodels.api as sm

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec

import radical.utils as ru
import radical.pilot as rp
import radical.analytics as ra

from IPython.display import display

pd.set_option('expand_frame_repr', False)

In [52]:
# Global configurations
# ---------------------

# Use LaTeX and its body font for the diagrams' text.
mpl.rcParams['text.usetex'] = True 
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.serif']  = ['Nimbus Roman Becker No9L']

# Use thinner lines for axes to avoid distractions.
mpl.rcParams['axes.linewidth']    = 0.75
mpl.rcParams['xtick.major.width'] = 0.75
mpl.rcParams['xtick.minor.width'] = 0.75
mpl.rcParams['ytick.major.width'] = 0.75
mpl.rcParams['ytick.minor.width'] = 0.75

# Do not use a box for the legend to avoid distractions.
#mpl.rcParams['legend.frameon'] = False

# Helpers
# -------

# Use coordinated colors. These are the "Tableau 20" colors as 
# RGB. Each pair is strong/light. For a theory of color    
tableau20 = [(31 , 119, 180), (174, 199, 232), # blue   [ 0,1 ]
             (255, 127, 14 ), (255, 187, 120), # orange [ 2,3 ]
             (44 , 160, 44 ), (152, 223, 138), # green  [ 4,5 ]
             (214, 39 , 40 ), (255, 152, 150), # red    [ 6,7 ]
             (148, 103, 189), (197, 176, 213), # purple [ 8,9 ]
             (140, 86 , 75 ), (196, 156, 148), # brown  [10,11]
             (227, 119, 194), (247, 182, 210), # pink   [12,13]
             (127, 127, 127), (199, 199, 199), # gray   [14,15]
             (188, 189, 34 ), (219, 219, 141), # yellow [16,17]
             (23 , 190, 207), (158, 218, 229)] # cyan   [18,19]
  
# Scale the RGB values to the [0, 1] range, which is the format 
# matplotlib accepts.    
for i in range(len(tableau20)):  
    r, g, b = tableau20[i]  
    tableau20[i] = (r / 255., g / 255., b / 255.)    

# Return a single plot without right and top axes
def fig_setup():
    fig = plt.figure(figsize=(13,7))
    ax = fig.add_subplot(111)  
    ax.spines["top"].set_visible(False)  
    ax.spines["right"].set_visible(False)  
    ax.get_xaxis().tick_bottom()  
    ax.get_yaxis().tick_left()
    
    return fig, ax

In [3]:
def load_data(rdir):
    sessions = {}
    experiments = {}
    start = rdir.rfind(os.sep)+1
    for path, dirs, files in os.walk(rdir):
        folders = path[start:].split(os.sep)
        if len(path[start:].split(os.sep)) == 2:
            sid = os.path.basename(glob.glob('%s/*.json' % path)[0])[:-5]
            if sid not in sessions.keys():
                sessions[sid] = {}
            sessions[sid] = ra.Session(sid, 'radical.pilot', src=path)
            experiments[sid] = folders[0]
    return sessions, experiments

# Load experiments' dataset into ra.session objects
# stored in a DataFrame.
rdir = '/Users/mturilli/Projects/RADICAL/github/experiments/AIMES-Swift/viveks_workflow/analysis/sessions/data/'
sessions, experiments = load_data(rdir)
sessions = pd.DataFrame({'session': sessions,
                         'experiment': experiments})

In [4]:
for sid in sessions.index:
    sessions.ix[sid, 'TTC'] = sessions.ix[sid, 'session'].ttc

In [5]:
for sid in sessions.index:
    sessions.ix[sid, 'nunit'] = len(sessions.ix[sid, 'session'].filter(etype='unit', inplace=False).get())

In [ ]:
sessions

In [6]:
# Model of TOTAL pilot durations.
# ttpdm = {'TT_PILOT_PMGR_SCHEDULING': ['NEW'                   ,  'PMGR_LAUNCHING_PENDING'],
#          'TT_PILOT_PMGR_QUEUING'   : ['PMGR_LAUNCHING_PENDING',  'PMGR_LAUNCHING'],
#          'TT_PILOT_LRMS_SUBMITTING': ['PMGR_LAUNCHING'        ,  'PMGR_ACTIVE_PENDING'],
#          'TT_PILOT_LRMS_QUEUING'   : ['PMGR_ACTIVE_PENDING'   ,  'PMGR_ACTIVE'],
#          'TT_PILOT_LRMS_RUNNING'   : ['PMGR_ACTIVE'           , ['DONE',
#                                                                  'CANCELED',
#                                                                  'FAILED']]}
ttpdm = {'TT_PILOT_LRMS_QUEUING'   : [rp.PMGR_ACTIVE_PENDING   ,  rp.PMGR_ACTIVE]}
    
# Add total pilot durations to sessions' DF.
for sid in sessions.index:
    s = sessions.ix[sid, 'session'].filter(etype='pilot', inplace=False)
    for d in ttpdm.keys():
        sessions.ix[sid, d] = s.duration(ttpdm[d])

In [7]:
# Model of pilot durations.
pdm = {#'PMGR_SCHEDULING': [rp.NEW                   ,  rp.PMGR_LAUNCHING_PENDING],
       #'PMGR_QUEUING'   : [rp.PMGR_LAUNCHING_PENDING,  rp.PMGR_LAUNCHING],
       #'LRMS_SUBMITTING': [rp.PMGR_LAUNCHING        ,  rp.PMGR_ACTIVE_PENDING],
       'LRMS_QUEUING'   : [rp.PMGR_ACTIVE_PENDING   ,  rp.PMGR_ACTIVE]}#,
       #'LRMS_RUNNING'   : [rp.PMGR_ACTIVE           , [rp.DONE,
                                                      #rp.CANCELED,
                                                      #rp.FAILED]]}

# DataFrame structure for pilot durations. 
# pds = { 'pid': [],
#         'sid': [],
#         'experiment'     : [],
#         'PMGR_SCHEDULING': [],
#         'PMGR_QUEUING'   : [],
#         'LRMS_SUBMITTING': [],
#         'LRMS_QUEUING'   : [],
#         'LRMS_RUNNING'   : []}

pds = {'pid': [],
       'sid': [],
       'experiment': [],
       'LRMS_QUEUING': []}


# Calculate the duration for each state of each 
# pilot of each run and Populate the DataFrame 
# structure.
for sid in sessions.index:
    s = sessions.ix[sid, 'session'].filter(etype='pilot', inplace=False)
    for p in s.list('uid'):
        sf = s.filter(uid=p, inplace=False)        
        pds['pid'].append(p)
        pds['sid'].append(sid)
        pds['experiment'].append(sessions.ix[sid, 'experiment'])
        for d in pdm.keys():
            if (not sf.timestamps(state=pdm[d][0]) or 
                not sf.timestamps(state=pdm[d][1])):
                pds[d].append(None)
                continue
            pds[d].append(sf.duration(pdm[d]))

# Populate the DataFrame.
pilots = pd.DataFrame(pds)

In [8]:
# Model of unit durations.
# udm = {'TT_UNIT_UMGR_SCHEDULING'   : ['NEW'                         , 'UMGR_SCHEDULING_PENDING'],
       #'TT_UNIT_UMGR_BINDING'      : ['UMGR_SCHEDULING_PENDING'     , 'UMGR_SCHEDULING'],
       #'TT_IF_UMGR_SCHEDULING'     : ['UMGR_SCHEDULING'             , 'UMGR_STAGING_INPUT_PENDING'], 
       #'TT_IF_UMGR_QUEING'         : ['UMGR_STAGING_INPUT_PENDING'  , 'UMGR_STAGING_INPUT'],
       #'TT_IF_AGENT_SCHEDULING'    : ['UMGR_STAGING_INPUT'          , 'AGENT_STAGING_INPUT_PENDING'],  
       #'TT_IF_AGENT_QUEUING'       : ['AGENT_STAGING_INPUT_PENDING' , 'AGENT_STAGING_INPUT'], 
       #'TT_IF_AGENT_TRANSFERRING'  : ['AGENT_STAGING_INPUT'         , 'AGENT_SCHEDULING_PENDING'],
       #'TT_UNIT_AGENT_QUEUING'     : ['AGENT_SCHEDULING_PENDING'    , 'AGENT_SCHEDULING'],
       #'TT_UNIT_AGENT_SCHEDULING'  : ['AGENT_SCHEDULING'            , 'AGENT_EXECUTING_PENDING'], 
       #'TT_UNIT_AGENT_QUEUING_EXEC': ['AGENT_EXECUTING_PENDING'     , 'AGENT_EXECUTING'], 
       #'TT_UNIT_AGENT_EXECUTING'   : ['AGENT_EXECUTING'             , 'AGENT_STAGING_OUTPUT_PENDING']} 
       #'TT_OF_AGENT_QUEUING'       : ['AGENT_STAGING_OUTPUT_PENDING', 'AGENT_STAGING_OUTPUT'], 
       #'TT_OF_UMGR_SCHEDULING'     : ['AGENT_STAGING_OUTPUT'        , 'UMGR_STAGING_OUTPUT_PENDING'],
       #'TT_OF_UMGR_QUEUING'        : ['UMGR_STAGING_OUTPUT_PENDING' , 'UMGR_STAGING_OUTPUT'],
       #'TT_OF_UMGR_TRANSFERRING'   : ['UMGR_STAGING_OUTPUT'         , 'DONE']}

udm = {'TT_UNIT_AGENT_EXECUTING'   : [rp.AGENT_EXECUTING, rp.AGENT_STAGING_OUTPUT_PENDING]}

# Calculate total unit durations for each session.
for sid in sessions.index:
    s = sessions.ix[sid, 'session'].filter(etype='unit', inplace=False)
#     for unit in s.get(etype='unit'):
#         print
#         print s._sid
#         print unit.uid
#         for state in unit.states:
#             print "%-20s : %7.2f" % (state, unit.states[state]['time'])    

    for d in udm.keys():
        sessions.ix[sid, d] = s.duration(udm[d])

In [ ]:
sessions

In [9]:
fig, ax = fig_setup()
title='XSEDE HPC Clusters: Comet, Gordon, Stampede, SuperMic\nKernel Density Estimation of Pilot Queuing Time (Tq)'

tq_all  = pilots['LRMS_QUEUING'].dropna().reset_index(drop=True)

ptqs = pd.DataFrame({'all': tq_all}) 

ptqs.plot.density(ax=ax, color=tableau20[0], title=title)
plt.axvline(pilots['LRMS_QUEUING'].min(),  ymax=0.9, color='black', linestyle='dashed', linewidth=0.4)
plt.axvline(pilots['LRMS_QUEUING'].mean(), ymax=0.9, color='red',   linestyle='dashed', linewidth=0.4)
plt.axvline(pilots['LRMS_QUEUING'].max(),  ymax=0.9, color='green', linestyle='dashed', linewidth=0.4)

ax.set_xlabel('Time (s)')
ax.legend(labels=['Pilot Queuing Time (Tq)', 'Min', 'Mean', 'Max'])

plt.savefig('xsede_tq_all_density.pdf', dpi=600, bbox_inches='tight')
#display(pilots)



In [10]:
# Model of unit durations.
# udm = {'UNIT_UMGR_SCHEDULING'   : ['NEW'                         , 'UMGR_SCHEDULING_PENDING'],
#        'UNIT_UMGR_BINDING'      : ['UMGR_SCHEDULING_PENDING'     , 'UMGR_SCHEDULING'],
#        'IF_UMGR_SCHEDULING'     : ['UMGR_SCHEDULING'             , 'UMGR_STAGING_INPUT_PENDING'], 
#        'IF_UMGR_QUEING'         : ['UMGR_STAGING_INPUT_PENDING'  , 'UMGR_STAGING_INPUT'],
#        'IF_AGENT_SCHEDULING'    : ['UMGR_STAGING_INPUT'          , 'AGENT_STAGING_INPUT_PENDING'],  
#        'IF_AGENT_QUEUING'       : ['AGENT_STAGING_INPUT_PENDING' , 'AGENT_STAGING_INPUT'], 
#        'IF_AGENT_TRANSFERRING'  : ['AGENT_STAGING_INPUT'         , 'AGENT_SCHEDULING_PENDING'],
#        'UNIT_AGENT_QUEUING'     : ['AGENT_SCHEDULING_PENDING'    , 'AGENT_SCHEDULING'],
#        'UNIT_AGENT_SCHEDULING'  : ['AGENT_SCHEDULING'            , 'AGENT_EXECUTING_PENDING'], 
#        'UNIT_AGENT_QUEUING_EXEC': ['AGENT_EXECUTING_PENDING'     , 'AGENT_EXECUTING'], 
#        'UNIT_AGENT_EXECUTING'   : ['AGENT_EXECUTING'             , 'AGENT_STAGING_OUTPUT_PENDING'],
#        'OF_AGENT_QUEUING'       : ['AGENT_STAGING_OUTPUT_PENDING', 'AGENT_STAGING_OUTPUT'], 
#        'OF_UMGR_SCHEDULING'     : ['AGENT_STAGING_OUTPUT'        , 'UMGR_STAGING_OUTPUT_PENDING'],
#        'OF_UMGR_QUEUING'        : ['UMGR_STAGING_OUTPUT_PENDING' , 'UMGR_STAGING_OUTPUT'],
#        'OF_UMGR_TRANSFERRING'   : ['UMGR_STAGING_OUTPUT'         , 'DONE']}

udm = {'UNIT_AGENT_EXECUTING'   : [rp.AGENT_EXECUTING, rp.AGENT_STAGING_OUTPUT_PENDING]}
        
# DataFrame structure for pilot durations. 
# uds = { 'pid': [],
#         'sid': [],
#         'experiment' : [],
#         'UNIT_UMGR_SCHEDULING'   : [],
#         'UNIT_UMGR_BINDING'      : [],
#         'IF_UMGR_SCHEDULING'     : [], 
#         'IF_UMGR_QUEING'         : [],
#         'IF_AGENT_SCHEDULING'    : [],
#         'IF_AGENT_QUEUING'       : [], 
#         'IF_AGENT_TRANSFERRING'  : [],
#         'UNIT_AGENT_QUEUING'     : [],
#         'UNIT_AGENT_SCHEDULING'  : [], 
#         'UNIT_AGENT_QUEUING_EXEC': [], 
#         'UNIT_AGENT_EXECUTING'   : [], 
#         'OF_AGENT_QUEUING'       : [], 
#         'OF_UMGR_SCHEDULING'     : [],
#         'OF_UMGR_QUEUING'        : [],
#         'OF_UMGR_TRANSFERRING'   : []}

uds = { 'uid': [],
        'sid': [],
        'experiment': [],
        'UNIT_AGENT_EXECUTING': []}

# Calculate the duration for each state of each 
# pilot of each run and Populate the DataFrame 
# structure.
for sid in sessions[['session', 'experiment']].index:
    print sid
#     if sessions.ix[sid, 'experiment'] in ['exp1','exp2']:
#         continue
#     print '%s - %s' % (sessions.ix[sid, 'experiment'], sid)
    s = sessions.ix[sid, 'session'].filter(etype='unit', inplace=False)
    for u in s.list('uid'):
#         print "\t%s" % u
        sf = s.filter(uid=u, inplace=False)
        uds['uid'].append(u)
        uds['sid'].append(sid)
        uds['experiment'].append(sessions.ix[sid, 'experiment'])
        for d in udm.keys():
            # print '\t\t%s' % udm[d]
            # print '\t\t[%s, %s]' % (sf.timestamps(state=udm[d][0]), sf.timestamps(state=udm[d][1])) 
            if (not sf.timestamps(state=udm[d][0]) or 
                not sf.timestamps(state=udm[d][1])):
                pds[d].append(None)
#                 print '\t\t%s: %s' % (d, 'None')
                continue
#             print sf.timestamps(state=udm[d][0])
#             print sf.timestamps(state=udm[d][1])
#             print '\t\t%s: %s' % (d, sf.duration(udm[d]))
            uds[d].append(sf.duration(udm[d]))

# Populate the DataFrame. We have empty lists 
units = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in uds.iteritems()]))


rp.session.radical.merzky.017082.0014
rp.session.radical.merzky.017083.0001
rp.session.radical.merzky.017083.0005
rp.session.radical.merzky.017083.0007
rp.session.radical.merzky.017083.0009
rp.session.radical.merzky.017083.0011
rp.session.radical.merzky.017083.0013
rp.session.radical.merzky.017083.0015
rp.session.radical.merzky.017083.0017
rp.session.radical.merzky.017083.0025
rp.session.radical.merzky.017083.0027
rp.session.radical.merzky.017083.0029
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-10-0963966fe86c> in <module>()
     54     for u in s.list('uid'):
     55 #         print "\t%s" % u
---> 56         sf = s.filter(uid=u, inplace=False)
     57         uds['uid'].append(u)
     58         uds['sid'].append(sid)

/Users/mturilli/Virtualenvs/RADICAL-ANALYTICS-SWIFT/lib/python2.7/site-packages/radical/analytics/session.pyc in filter(self, etype, uid, state, event, time, inplace)
    374             # create a new session with the resulting entity list
    375             ret = Session(sid=self._sid, stype=self._stype, src=self._src,
--> 376                           _init=False)
    377             ret._reinit(entities = {uid:self._entities[uid] for uid in uids})
    378             ret._initialize_properties()

/Users/mturilli/Virtualenvs/RADICAL-ANALYTICS-SWIFT/lib/python2.7/site-packages/radical/analytics/session.pyc in __init__(self, sid, stype, src, _entities, _init)
     37         if stype == 'radical.pilot':
     38             import radical.pilot as rp
---> 39             self._profile, self._t_min = rp.utils.get_session_profile    (sid=sid, src=self._src)
     40             self._description, hm      = rp.utils.get_session_description(sid=sid, src=self._src)
     41 

/Users/mturilli/Virtualenvs/RADICAL-ANALYTICS-SWIFT/lib/python2.7/site-packages/radical/pilot/utils/prof_utils.pyc in get_session_profile(sid, src)
    856 
    857     profs       = read_profiles(profiles)
--> 858     prof, t_min = combine_profiles(profs)
    859     prof        = clean_profile(prof, sid)
    860 

/Users/mturilli/Virtualenvs/RADICAL-ANALYTICS-SWIFT/lib/python2.7/site-packages/radical/pilot/utils/prof_utils.pyc in combine_profiles(profiles)
    394     # make times relative to t_min again
    395     for row in p_glob:
--> 396         row['time'] -= t_min
    397 
    398     # sort by time and return

KeyboardInterrupt: 

In [16]:
# get all session IDs and profile paths
dirs = glob.glob('/Users/mturilli/Projects/RADICAL/github/experiments/AIMES-Swift/viveks_workflow/analysis/sessions/data/exp1/rp.session.*/')
sids = dict()
for d in dirs:
    while d.endswith('/'):
        d = d[:-1]
    sids[os.path.basename(d)] = d

# tx distribution is what we are interested in
tx        = list()
tx_states = [rp.AGENT_EXECUTING, rp.AGENT_STAGING_OUTPUT_PENDING]

# fill tx
last_states = dict()
for sid in sids:

    session = ra.Session(sid, 'radical.pilot', src=sids[sid])
    units   = session.filter(etype='unit', inplace=True)
    n       = len(units.get())

    for unit in units.get():

        # we only want units from stage 2
        if 'stage_2' not in unit.description['executable']:
            continue

        # get tx, and add it to the set of data points to plot.
        # ignore mismatching units.
        # also find out what the last state of the unit was (sorted by time)
        try:
            t = unit.duration(tx_states)
            if t:
                tx.append({'t' : t})
            ls = sorted(unit.states.keys(), key=lambda x: (unit.states[x]['time']))[-1]
        except:
            pass

        
        # update counter for the last states
        if ls not in last_states:
            last_states[ls] = 0
        last_states[ls] += 1

df  = pd.DataFrame(tx)


Out[16]:
t
0 379.6412
1 422.8411
2 400.1186
3 402.9694
4 412.1639
5 434.5441
6 436.4565
7 386.8066
8 315.2917
9 409.6533
10 405.7284
11 404.4564
12 422.0356
13 390.4736
14 353.5025
15 412.1519
16 439.2774
17 358.5467
18 393.0839
19 423.3387
20 411.9393
21 415.9363
22 398.5623
23 416.5731
24 415.1062
25 384.6939
26 332.4721
27 469.8040
28 429.4663
29 425.6222
... ...
9698 506.6165
9699 461.8073
9700 469.5661
9701 254.6999
9702 468.2319
9703 153.7805
9704 463.9842
9705 216.5042
9706 440.7907
9707 458.3301
9708 470.1746
9709 465.4409
9710 459.3660
9711 464.7102
9712 466.2441
9713 452.6092
9714 459.3578
9715 456.8528
9716 454.5153
9717 465.8423
9718 155.1178
9719 473.8200
9720 254.8008
9721 251.4041
9722 467.2536
9723 153.8558
9724 467.0639
9725 158.3996
9726 465.8297
9727 464.4771

9728 rows × 1 columns


In [43]:
units = pd.read_csv('/Users/mturilli/Projects/RADICAL/github/radical.analytics/use_cases/rp_on_osg/units_tx.csv')
# display(df['t'].tolist())
# display(units['UNIT_AGENT_EXECUTING'].tolist())
tx_xsede_osg = pd.DataFrame({'XSEDE': df['t'], 'OSG': units['UNIT_AGENT_EXECUTING']})
display(tx_xsede_osg)


OSG XSEDE
0 210.2204 379.6412
1 239.5106 422.8411
2 249.2585 400.1186
3 294.9099 402.9694
4 214.2163 412.1639
5 197.2057 434.5441
6 205.2098 436.4565
7 342.1608 386.8066
8 193.1973 315.2917
9 283.2868 409.6533
10 218.7231 405.7284
11 341.3434 404.4564
12 202.2077 422.0356
13 197.1962 390.4736
14 391.2101 353.5025
15 310.4892 412.1519
16 220.8741 439.2774
17 201.2163 358.5467
18 470.5228 393.0839
19 470.7448 423.3387
20 351.3825 411.9393
21 424.4588 415.9363
22 348.3644 398.5623
23 345.1255 416.5731
24 418.0983 415.1062
25 201.2167 384.6939
26 202.1977 332.4721
27 200.2193 469.8040
28 199.2121 429.4663
29 331.3480 425.6222
... ... ...
9698 NaN 506.6165
9699 NaN 461.8073
9700 NaN 469.5661
9701 NaN 254.6999
9702 NaN 468.2319
9703 NaN 153.7805
9704 NaN 463.9842
9705 NaN 216.5042
9706 NaN 440.7907
9707 NaN 458.3301
9708 NaN 470.1746
9709 NaN 465.4409
9710 NaN 459.3660
9711 NaN 464.7102
9712 NaN 466.2441
9713 NaN 452.6092
9714 NaN 459.3578
9715 NaN 456.8528
9716 NaN 454.5153
9717 NaN 465.8423
9718 NaN 155.1178
9719 NaN 473.8200
9720 NaN 254.8008
9721 NaN 251.4041
9722 NaN 467.2536
9723 NaN 153.8558
9724 NaN 467.0639
9725 NaN 158.3996
9726 NaN 465.8297
9727 NaN 464.4771

9728 rows × 2 columns


In [83]:
mpl.rcParams['text.usetex'] = True 
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.serif']  = ['Helvetica']

mpl.rcParams['legend.frameon'] = True
mpl.rcParams['patch.linewidth'] = 0.75

SIZE = 20
plt.rc('font', size=SIZE)  # controls default text sizes
plt.rc('axes', titlesize=SIZE)  # fontsize of the axes title
plt.rc('axes', labelsize=SIZE)  # fontsize of the x any y labels
plt.rc('xtick', labelsize=SIZE)  # fontsize of the tick labels
plt.rc('ytick', labelsize=SIZE)  # fontsize of the tick labels
plt.rc('legend', fontsize=13)  # legend fontsize
plt.rc('figure', titlesize=SIZE)  # # size of the figure title



fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(111)  
mtitle ='OSG XSEDE Virtual Cluster and XSEDE HPC Clusters'
stitle ='Comparison of Kernel Density Estimation of Task Execution Time ($T_x$)'
title  = '%s\n%s' % (mtitle, stitle) 
fig.suptitle(title, fontsize=19)

# tx_all  = units['UNIT_AGENT_EXECUTING'].dropna().reset_index(drop=True)

# utxs = pd.DataFrame({'all': tx_all}) 

print tx_xsede_osg.describe()

tx_xsede_osg.plot.density(ax=ax, color=[tableau20[2],tableau20[0]], linewidth=2)#, title=title)
# plt.axvline(df['t'].min(),  ymax=0.9, color='black', linestyle='dashed', linewidth=0.4)
# plt.axvline(df['t'].mean(), ymax=0.9, color='red',   linestyle='dashed', linewidth=0.4)
# plt.axvline(df['t'].max(),  ymax=0.9, color='green', linestyle='dashed', linewidth=0.4)

ax.set_xlim((0, 1000))
ax.set_xlabel('$T_x$ (s)')
ax.legend()


plt.savefig('xsede_osg_tx_all_frequency.pdf', dpi=600, bbox_inches='tight')
tx_xsede_osg.to_csv('osg_xsede_tx.csv')


               OSG        XSEDE
count  6472.000000  9728.000000
mean    372.842403   318.983749
std     185.018101   105.522465
min     133.432300     0.962600
25%     240.953975   252.821650
50%     339.363100   255.261200
75%     457.453525   408.415675
max    2216.327500   950.575400

In [ ]:


In [ ]: