In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
%matplotlib inline
#plt.style.use('seaborn')

from pandas import *
from pandas.io.parsers import read_csv

In [4]:
pod1 = read_csv('out-d6mb2.csv', index_col=False, header=0)
pod2 = read_csv('out-j4jwv.csv', index_col=False, header=0)
pod3 = read_csv('out-vllr8.csv', index_col=False, header=0)
#lancs.describe().loc[['count','sum','mean','std']]
#mwt2.describe().loc[['count','sum','mean','std']]
#cern.describe().loc[['count','sum','mean','std']]
len(pod1)
len(pod2)
len(pod3)
pod1['Duration']


Out[4]:
0      0.994961
1      0.427366
2      1.052059
3      0.378053
4      0.410858
5      0.401752
6      0.363257
7      0.375299
8      0.535919
9      0.445106
10     0.462141
11     0.592257
12     1.043263
13     0.775022
14     1.238216
15     0.488930
16     0.518059
17     0.600923
18     0.579301
19     0.782252
20     0.854812
21     0.802187
22     0.535856
23     0.602070
24     1.016569
25     0.837249
26     0.954549
27     0.765651
28     1.070147
29     0.473768
         ...   
613    0.513736
614    0.493029
615    0.494767
616    0.567695
617    0.529760
618    0.483421
619    0.473931
620    0.575866
621    0.711156
622    0.813189
623    0.568627
624    0.948127
625    0.422519
626    0.391298
627    0.757861
628    0.371222
629    0.726035
630    0.642168
631    0.601310
632    0.923712
633    0.608841
634    0.575233
635    0.469971
636    0.703370
637    0.593929
638    0.623937
639    0.606545
640    0.534327
641    0.601769
642    0.847129
Name: Duration, Length: 643, dtype: float64

In [8]:
def objsize(x, pos):
    'The two args are the value and tick position'
    return '{:0.1f}'.format(x)
fig, ax = plt.subplots(figsize=(9, 6))
bins=100
myrange=(0, 2)
pod1['Duration'].hist(ax=ax, bins=bins, label=['pod1'], alpha=0.5, range=myrange)
pod2['Duration'].hist(ax=ax, bins=bins, label=['pod2'], alpha=0.5, range=myrange)
pod3['Duration'].hist(ax=ax, bins=bins, label=['pod3'], alpha=0.5, range=myrange)
ax.set(title='Distribution of transfer duration', xlabel='Duration (s)', ylabel='Count')
formatter = FuncFormatter(objsize)
ax.set_xlim(myrange)
#ax.set_yscale('log')
ax.xaxis.set_major_formatter(formatter)
ax.legend().set_visible(True)



In [13]:
(len(pod1),
len(pod2),
len(pod3))


Out[13]:
(643, 630, 1000)

In [ ]: