In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
%matplotlib inline
#plt.style.use('seaborn')

from pandas import *
from pandas.io.parsers import read_csv

In [18]:
lancs = read_csv('lancs-mar6.csv', index_col=False, header=0)
mwt2 = read_csv('mwt2-mar6.csv', index_col=False, header=0)
cern = read_csv('cern-mar6.csv', index_col=False, header=0)
#lancs.describe().loc[['count','sum','mean','std']]
#mwt2.describe().loc[['count','sum','mean','std']]
#cern.describe().loc[['count','sum','mean','std']]
len(lancs)
len(mwt2)
len(cern)
lancs['Duration']


Out[18]:
0      0.316320
1      0.241416
2      0.298700
3      0.270291
4      0.258126
5      0.290675
6      0.273076
7      0.259662
8      0.267440
9      0.252434
10     0.266827
11     0.340353
12     0.253039
13     0.270602
14     0.279930
15     0.245711
16     0.279081
17     0.253243
18     0.262261
19     0.274074
20     0.273719
21     0.245640
22     0.268391
23     0.239929
24     0.304220
25     0.275171
26     0.267007
27     0.291379
28     0.313308
29     0.239300
         ...   
970    0.259843
971    0.226745
972    0.458301
973    0.248372
974    0.180495
975    0.230690
976    0.197432
977    0.225759
978    0.236757
979    0.263461
980    0.241500
981    0.257842
982    0.263334
983    0.332928
984    0.297329
985    0.273926
986    0.288765
987    0.238403
988    0.260196
989    0.237109
990    0.239216
991    0.196457
992    0.240534
993    0.226508
994    0.227612
995    0.229715
996    0.267567
997    0.245277
998    0.274496
999    0.183536
Name: Duration, dtype: float64

In [20]:
#cs=cern[(cern<5e6)].dropna()
#cs.sum()/1e9
mwt2['Duration'].hist(label=['mwt2'], alpha=0.5)


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdde8e6df60>

In [34]:
range(0, 15)


Out[34]:
range(0, 15)

In [42]:
def objsize(x, pos):
    'The two args are the value and tick position'
    return '{:1.0f}'.format(x)
fig, ax = plt.subplots(figsize=(9, 6))
bins=100
myrange=(0, 5)
mwt2['Duration'].hist(ax=ax, bins=bins, label=['mwt2'], alpha=0.5, range=myrange)
cern['Duration'].hist(ax=ax, bins=bins, label=['cern'], alpha=0.5, range=myrange)
lancs['Duration'].hist(ax=ax, bins=bins, label=['lancs'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
#ax.set_yscale('log')
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [ ]: