In [1]:
    
%matplotlib inline
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
    
    
In [33]:
    
df = pd.read_csv('all_results.csv')
    
In [3]:
    
df.head()
    
    Out[3]:
In [4]:
    
df['machine'].unique()
    
    Out[4]:
In [5]:
    
df['benchmark'].unique()
    
    Out[5]:
In [18]:
    
df['limits'].unique()
    
    Out[18]:
In [34]:
    
df.fillna(True, inplace=True)
    
In [35]:
    
%run normalize.py
    
In [36]:
    
df = obtain_speedup(df, 'issdm-12')
    
In [49]:
    
df.head()
    
    Out[49]:
In [71]:
    
machine = '192.168.140.81'
subdf = df[df["machine"] == machine]
    
In [72]:
    
sns.boxplot(subdf.speedup, groupby=subdf.limits)
    
    
    Out[72]:
    
The above might look like it shows that with limits we get lower/higher variability, but we need to normalize speedups w.r.t. the max/min values of each 'limits' group, otherwise this is an optical illusion
In [73]:
    
subdf['speedup_normalized'] = subdf.apply(
  lambda x:
    (x['result'] - subdf[subdf['limits'] == x['limits']]['speedup'].mean()) /
    (subdf[subdf['limits'] == x['limits']]['speedup'].max() -
     subdf[subdf['limits'] == x['limits']]['speedup'].min()),
  axis=1
)
    
    
In [77]:
    
subdf.head()
    
    Out[77]:
In [75]:
    
sns.boxplot(subdf.speedup_normalized, groupby=subdf.limits)
    
    
    Out[75]:
    
In [76]:
    
subdf[subdf['speedup'] > 25]
    
    Out[76]:
In [ ]: