In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
%matplotlib inline
#plt.style.use('seaborn')
from pandas import *
from pandas.io.parsers import read_csv
In [18]:
lancs = read_csv('lancs-mar6.csv', index_col=False, header=0)
mwt2 = read_csv('mwt2-mar6.csv', index_col=False, header=0)
cern = read_csv('cern-mar6.csv', index_col=False, header=0)
#lancs.describe().loc[['count','sum','mean','std']]
#mwt2.describe().loc[['count','sum','mean','std']]
#cern.describe().loc[['count','sum','mean','std']]
len(lancs)
len(mwt2)
len(cern)
lancs['Duration']
Out[18]:
In [20]:
#cs=cern[(cern<5e6)].dropna()
#cs.sum()/1e9
mwt2['Duration'].hist(label=['mwt2'], alpha=0.5)
Out[20]:
In [34]:
range(0, 15)
Out[34]:
In [42]:
def objsize(x, pos):
'The two args are the value and tick position'
return '{:1.0f}'.format(x)
fig, ax = plt.subplots(figsize=(9, 6))
bins=100
myrange=(0, 5)
mwt2['Duration'].hist(ax=ax, bins=bins, label=['mwt2'], alpha=0.5, range=myrange)
cern['Duration'].hist(ax=ax, bins=bins, label=['cern'], alpha=0.5, range=myrange)
lancs['Duration'].hist(ax=ax, bins=bins, label=['lancs'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
#ax.set_yscale('log')
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)
In [ ]: