notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns



In [11]:

    
nolimits = pd.read_csv('baseline.csv')
nolimits['limits'] = False
withlimits = pd.read_csv('with_limits.csv')
withlimits['limits'] = True
df = pd.concat([nolimits, withlimits], ignore_index=True)



In [13]:

    
df.head()









    Out[13]:






  
    
      
      repetition
      machine
      benchmark
      job
      read_bw
      write_bw
      read_iops
      write_iops
      elapsed
      limits
    
  
  
    
      0
      5
      issdm-47
      fio
      rw-sdb-4k-seq
      2990
      2996
      747.59
      749.16
      36
      False
    
    
      1
      5
      issdm-14
      fio
      rw-sdb-4k-seq
      2955
      2954
      738.78
      738.51
      36
      False
    
    
      2
      5
      issdm-38
      fio
      rw-sdb-4k-seq
      2929
      2930
      732.30
      732.64
      36
      False
    
    
      3
      5
      issdm-20
      fio
      rw-sdb-4k-seq
      3135
      3130
      783.75
      782.58
      36
      False
    
    
      4
      5
      issdm-27
      fio
      rw-sdb-4k-seq
      2934
      2934
      733.70
      733.64
      36
      False



In [21]:

    
len(df.machine.unique())









    Out[21]:





15



In [19]:

    
df.groupby('limits')['read_bw'].std()









    Out[19]:





limits
False    98.847955
True     32.235759
Name: read_bw, dtype: float64



In [22]:

    
df.groupby('limits')['write_bw'].std()









    Out[22]:





limits
False    97.443585
True     29.620136
Name: write_bw, dtype: float64



In [15]:

    
ax = sns.boxplot(x='job', y='read_bw', data=df, hue='limits')



In [16]:

    
ax = sns.boxplot(x='job', y='write_bw', data=df, hue='limits')

Our results show that we reduce variability, at least by 3x. Let's codify this assertion:



In [27]:

    
print((df.query('limits == False')['read_bw'].std() / df.query('limits == True')['read_bw'].std()) >= 3.0)
print((df.query('limits == False')['write_bw'].std() / df.query('limits == True')['write_bw'].std()) >= 3.0)









    



True
True

	repetition	machine	benchmark	job	read_bw	write_bw	read_iops	write_iops	elapsed	limits
0	5	issdm-47	fio	rw-sdb-4k-seq	2990	2996	747.59	749.16	36	False
1	5	issdm-14	fio	rw-sdb-4k-seq	2955	2954	738.78	738.51	36	False
2	5	issdm-38	fio	rw-sdb-4k-seq	2929	2930	732.30	732.64	36	False
3	5	issdm-20	fio	rw-sdb-4k-seq	3135	3130	783.75	782.58	36	False
4	5	issdm-27	fio	rw-sdb-4k-seq	2934	2934	733.70	733.64	36	False