In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
%matplotlib inline
#plt.style.use('seaborn')

from pandas import *
from pandas.io.parsers import read_csv

def objsize(x, pos):
    'The two args are the value and tick position'
    return '{:0.1f}'.format(x)

In [2]:
pod1 = read_csv('out-aglt2857.csv', index_col=False, header=0, nrows=30000)
aga = read_csv('sort-aglt2857.csv', index_col=False, header=0, nrows=30000)[:10000]
agb = read_csv('sort-aglt2857.csv', index_col=False, header=0, nrows=30000)[20000:30000]
pod2 = read_csv('out-cern857-bnlproj.csv', index_col=False, header=0, nrows=30000)
pod3 = read_csv('out-lancs857.csv', index_col=False, header=0, nrows=30000)
lana = read_csv('out-lancs857.csv', index_col=False, header=0, nrows=30000)[:10000]
lanb = read_csv('out-lancs857.csv', index_col=False, header=0, nrows=30000)[20000:30000]
pod4 = read_csv('out-mwt2857.csv', index_col=False, header=0, nrows=30000)
mwa = read_csv('sort-mwt2857.csv', index_col=False, header=0, nrows=30000)[:10000]
mwb = read_csv('sort-mwt2857.csv', index_col=False, header=0, nrows=30000)[20000:30000]
pod5 = read_csv('out-stfc857c.csv', index_col=False, header=0, nrows=30000)
rala = read_csv('sort-stfc857c.csv', index_col=False, header=0, nrows=50000)[:10000]
ralb = read_csv('sort-stfc857c.csv', index_col=False, header=0, nrows=50000)[40000:50000]
ralc = read_csv('stfc512.csv', index_col=False, header=0)
#pod5 = read_csv('out-aglt2857-nolimit.csv', index_col=False, header=0)

In [128]:
fig, axes = plt.subplots(figsize=(12, 12), nrows=5, ncols=1)
fig.tight_layout()
fig.subplots_adjust(top=0.95)
bins=200
myrange=(0, 4)
pod1['Duration'].hist(ax=axes[0], bins=bins, label=['AGLT2'], alpha=0.8, range=myrange)
pod2['Duration'].hist(ax=axes[1], bins=bins, label=['CERN'], alpha=0.8, range=myrange)
pod3['Duration'].hist(ax=axes[2], bins=bins, label=['LANCS'], alpha=0.8, range=myrange)
pod4['Duration'].hist(ax=axes[3], bins=bins, label=['MWT2'], alpha=0.8, range=myrange)
pod5['Duration'].hist(ax=axes[4], bins=bins, label=['RAL'], alpha=0.8, range=myrange)
#ax[0].set(title='Distribution of transfer duration (Source is GCE us-central1)', xlabel='Duration (s)', ylabel='Count')
fig.suptitle('Distribution of transfer duration (Source is GCE us-central1)', fontsize=16)
for i in range(5):
    axes[i].legend().set_visible(True)

formatter = FuncFormatter(objsize)
#axes[0].set_xlim(myrange)
#ax.xaxis.set_major_formatter(formatter)
#ax.legend().set_visible(True)



In [129]:
fig, ax = plt.subplots(figsize=(12, 9))
fig.tight_layout()
fig.subplots_adjust(top=0.95)
bins=200
myrange=(0, 4)
pod1['Duration'].hist(ax=ax, bins=bins, label=['AGLT2'], alpha=0.8, range=myrange)
pod2['Duration'].hist(ax=ax, bins=bins, label=['CERN'], alpha=0.8, range=myrange)
pod3['Duration'].hist(ax=ax, bins=bins, label=['LANCS'], alpha=0.8, range=myrange)
pod4['Duration'].hist(ax=ax, bins=bins, label=['MWT2'], alpha=0.8, range=myrange)
pod5['Duration'].hist(ax=ax, bins=bins, label=['RAL'], alpha=0.8, range=myrange)
#ax[0].set(title='Distribution of transfer duration (Source is GCE us-central1)', xlabel='Duration (s)', ylabel='Count')
fig.suptitle('Distribution of transfer duration (Source is GCE us-central1)', fontsize=16)

formatter = FuncFormatter(objsize)
#axes[0].set_xlim(myrange)
#ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [114]:
pod5.Duration.quantile(0.99)


Out[114]:
6.791339339999988

In [122]:
fig, ax = plt.subplots(figsize=(12, 9))
fig.tight_layout()
bins=200
myrange=(0, 4)
aga['Duration'].hist(ax=ax, bins=bins, label=['AGLT2-first-10k'], alpha=0.5, range=myrange)
agb['Duration'].hist(ax=ax, bins=bins, label=['AGLT2-last-10k'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration (Source is GCE us-central1)', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [127]:
fig, ax = plt.subplots(figsize=(12, 9))
fig.tight_layout()
bins=200
myrange=(0, 4)
mwa['Duration'].hist(ax=ax, bins=bins, label=['MWT2-first-10k'], alpha=0.5, range=myrange)
mwb['Duration'].hist(ax=ax, bins=bins, label=['MWT2-last-10k'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration (Source is GCE us-central1)', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [4]:
fig, ax = plt.subplots(figsize=(12, 9))
fig.tight_layout()
bins=200
myrange=(0, 20)
rala['Duration'].hist(ax=ax, bins=bins, label=['RAL-first-10k'], alpha=0.5, range=myrange)
ralb['Duration'].hist(ax=ax, bins=bins, label=['RAL-last-10k'], alpha=0.5, range=myrange)
ralc['Duration'].hist(ax=ax, bins=bins, label=['RAL-512'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration (Source is GCE us-central1)', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [119]:
fig, ax = plt.subplots(figsize=(12, 9))
fig.tight_layout()
bins=200
myrange=(0, 10)
lana['Duration'].hist(ax=ax, bins=bins, label=['LANCS-first-10k'], alpha=0.5, range=myrange)
lanb['Duration'].hist(ax=ax, bins=bins, label=['LANCS-last-10k'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration (Source is GCE us-central1)', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [ ]: