In [1]:
%matplotlib inline
import pandas as pd
import random
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
pd.set_option("display.max_rows", 8)
First, we load all test data.
In [2]:
df = pd.read_csv('stress-ng/third/torpor-results/alltests.csv')
Let's have a look at the pattern of data.
In [3]:
df.head()
Out[3]:
Show all the test machines.
In [4]:
df['machine'].unique()
Out[4]:
Define some predicates for machines and limits
In [5]:
machine_is_issdm_6 = df['machine'] == 'issdm-6'
machine_is_t2_micro = df['machine'] == 't2.micro'
machine_is_kv3 = df['machine'] == 'kv3'
limits_is_with = df['limits'] == 'with'
limits_is_without = df['limits'] == 'without'
Show the number of stress tests on different machines
In [6]:
df_issdm_6_with_limit = df[machine_is_issdm_6 & limits_is_with]
df_t2_micro_with_limit = df[machine_is_t2_micro & limits_is_with]
df_kv3_without_limit = df[machine_is_kv3 & limits_is_without]
print(
len(df_issdm_6_with_limit), # machine issdm-6 with limit
len(df[machine_is_issdm_6 & limits_is_without]), # machine issdm-6 without limit
len(df_t2_micro_with_limit), # machine t2.micro with limit
len(df[machine_is_t2_micro & limits_is_without]), # machine t2.micro without limit
len(df_kv3_without_limit) # machine kv3 without limit
)
Because those failed benchmarks are not shown in the result report, we want to know how many common successful stress tests on the target machine and kv3.
In [7]:
issdm_6_with_limit_merge_kv3 = pd.merge(df_issdm_6_with_limit, df_kv3_without_limit, how='inner', on='benchmark')
t2_micro_with_limit_merge_kv3 = pd.merge(df_t2_micro_with_limit, df_kv3_without_limit, how='inner', on='benchmark')
print(
# common successful tests from issdm-6 and kv3
len(issdm_6_with_limit_merge_kv3),
# common successful tests from t2.micro and kv3
len(t2_micro_with_limit_merge_kv3)
)
Read the normalized results.
In [8]:
df_normalized = pd.read_csv('stress-ng/third/torpor-results/alltests_with_normalized_results_1.1.csv')
Show some of the data lines. The normalized value is the speedup based on kv3. It becomes a negative value when the benchmark runs on the target machine is slower than on kv3 (slowdown).
In [9]:
df_normalized.head()
Out[9]:
Show those benchmarks are not both successful completed on the issdm-6 and kv3.
In [10]:
df_issdm_6_with_limit[~df_issdm_6_with_limit['benchmark'].isin(issdm_6_with_limit_merge_kv3['benchmark'])]
Out[10]:
Show those benchmarks are not both successful completed on the t2.micro and kv3.
In [11]:
df_t2_micro_with_limit[~df_t2_micro_with_limit['benchmark'].isin(t2_micro_with_limit_merge_kv3['benchmark'])]
Out[11]:
We can find the number of benchmarks are speed-up and slowdown, respectively.
In [12]:
normalized_limits_is_with = df_normalized['limits'] == 'with'
normalized_limits_is_without = df_normalized['limits'] == 'without'
normalized_machine_is_issdm_6 = df_normalized['machine'] == 'issdm-6'
normalized_machine_is_t2_micro = df_normalized['machine'] == 't2.micro'
normalized_is_speed_up = df_normalized['normalized'] > 0
normalized_is_slow_down = df_normalized['normalized'] < 0
print(
# issdm-6 without CPU restriction
len(df_normalized[normalized_limits_is_without & normalized_machine_is_issdm_6 & normalized_is_speed_up]), # 1. speed-up
len(df_normalized[normalized_limits_is_without & normalized_machine_is_issdm_6 & normalized_is_slow_down]), # 2. slowdown
# issdm-6 with CPU restriction
len(df_normalized[normalized_limits_is_with & normalized_machine_is_issdm_6 & normalized_is_speed_up]), # 3. speed-up
len(df_normalized[normalized_limits_is_with & normalized_machine_is_issdm_6 & normalized_is_slow_down]), # 4. slowdown
# t2.micro without CPU restriction
len(df_normalized[normalized_limits_is_without & normalized_machine_is_t2_micro & normalized_is_speed_up]), # 5. speed-up
len(df_normalized[normalized_limits_is_without & normalized_machine_is_t2_micro & normalized_is_slow_down]), # 6. slowdown
# t2.micro with CPU restriction
len(df_normalized[normalized_limits_is_with & normalized_machine_is_t2_micro & normalized_is_speed_up]), # 7. speed-up
len(df_normalized[normalized_limits_is_with & normalized_machine_is_t2_micro & normalized_is_slow_down]) # 8. slowdown
)
The average of normalized value for results under CPU restriction
In [13]:
print(
# For issdm-6
df_normalized[normalized_machine_is_issdm_6 & normalized_limits_is_with]['normalized'].mean(),
# For t2_micro
df_normalized[normalized_machine_is_t2_micro & normalized_limits_is_with]['normalized'].mean()
)
Let's have a look at the histogram of frequency of normalized value based on stress tests without CPU restriction running on issdm-6.
In [14]:
df_normalized_issdm_6_without_limit = df_normalized[normalized_machine_is_issdm_6 & normalized_limits_is_without]
df_normalized_issdm_6_without_limit.normalized.hist(bins=150, figsize=(25,12), xlabelsize=20, ylabelsize=20)
plt.title('stress tests run on issdm-6 without CPU restriction', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[14]:
Here is the rank of normalized value from stress tests without CPU restriction
In [15]:
df_normalized_issdm_6_without_limit_sorted = df_normalized_issdm_6_without_limit.sort_values(by='normalized', ascending=0)
df_normalized_issdm_6_without_limit_sorted_head = df_normalized_issdm_6_without_limit_sorted.head()
df_normalized_issdm_6_without_limit_sorted_tail = df_normalized_issdm_6_without_limit_sorted.tail()
df_normalized_issdm_6_without_limit_sorted_head.append(df_normalized_issdm_6_without_limit_sorted_tail)
Out[15]:
Now let's have a look at the histogram of frequency of normalized value based on stress tests with CPU restriction running on issdm-6.
In [16]:
df_normalized_issdm_6_with_limit = df_normalized[normalized_machine_is_issdm_6 & normalized_limits_is_with]
df_normalized_issdm_6_with_limit.normalized.hist(color='Orange', bins=150, figsize=(25,12), xlabelsize=20, ylabelsize=20)
plt.title('stress tests run on issdm-6 with CPU restriction', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[16]:
Here is the rank of normalized value from stress tests with CPU restriction
In [17]:
df_normalized_issdm_6_with_limit_sorted = df_normalized_issdm_6_with_limit.sort_values(by='normalized', ascending=0)
df_normalized_issdm_6_with_limit_sorted_head = df_normalized_issdm_6_with_limit_sorted.head()
df_normalized_issdm_6_with_limit_sorted_tail = df_normalized_issdm_6_with_limit_sorted.tail()
df_normalized_issdm_6_with_limit_sorted_head.append(df_normalized_issdm_6_with_limit_sorted_tail)
Out[17]:
We notice that the stressng-cpu-jenkin looks like an outlier. Let's redraw the histogram without this one.
In [18]:
df_normalized_issdm_6_no_outlier = df_normalized_issdm_6_with_limit['benchmark'] != 'stressng-cpu-jenkin'
df_normalized_issdm_6_with_limit[df_normalized_issdm_6_no_outlier].normalized.hist(color='Green', bins=150, figsize=(25,12), xlabelsize=20, ylabelsize=20)
plt.title('stress tests run on issdm-6 with CPU restriction (no outlier)', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[18]:
Let's have a look at the histogram of frequency of normalized value based on stress tests without CPU restriction running on t2.micro.
In [19]:
df_normalized_t2_micro_without_limit = df_normalized[normalized_machine_is_t2_micro & normalized_limits_is_without]
df_normalized_t2_micro_without_limit.normalized.hist(bins=150,figsize=(30,12), xlabelsize=20, ylabelsize=20)
plt.title('stress tests run on t2.micro without CPU restriction', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[19]:
Here is the rank of normalized value from stress tests without CPU restriction
In [20]:
df_normalized_t2_micro_without_limit_sorted = df_normalized_t2_micro_without_limit.sort_values(by='normalized', ascending=0)
df_normalized_t2_micro_without_limit_sorted_head = df_normalized_t2_micro_without_limit_sorted.head()
df_normalized_t2_micro_without_limit_sorted_tail = df_normalized_t2_micro_without_limit_sorted.tail()
df_normalized_t2_micro_without_limit_sorted_head.append(df_normalized_t2_micro_without_limit_sorted_tail)
Out[20]:
Let's have a look at the histogram of frequency of normalized value based on stress tests with CPU restriction running on t2.micro.
In [21]:
df_normalized_t2_micro_with_limit = df_normalized[normalized_machine_is_t2_micro & normalized_limits_is_with]
df_normalized_t2_micro_with_limit.normalized.hist(color='Orange', bins=150, figsize=(30,12), xlabelsize=20, ylabelsize=20)
plt.title('stress tests run on t2.micro with CPU restriction', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[21]:
Here is the rank of normalized value from stress tests with CPU restriction
In [22]:
df_normalized_t2_micro_with_limit_sorted = df_normalized_t2_micro_with_limit.sort_values(by='normalized', ascending=0)
df_normalized_t2_micro_with_limit_sorted_head = df_normalized_t2_micro_with_limit_sorted.head()
df_normalized_t2_micro_with_limit_sorted_tail = df_normalized_t2_micro_with_limit_sorted.tail()
df_normalized_t2_micro_with_limit_sorted_head.append(df_normalized_t2_micro_with_limit_sorted_tail)
Out[22]:
We notice that the stressng-memory-stack looks like an outlier. Let's redraw the histogram without this one.
In [23]:
df_normalized_t2_micro_no_outlier = df_normalized_t2_micro_with_limit['benchmark'] != 'stressng-memory-stack'
df_normalized_t2_micro_with_limit[df_normalized_t2_micro_no_outlier].normalized.hist(color='Green', bins=150, figsize=(30,12), xlabelsize=20, ylabelsize=20)
plt.title('stress tests run on t2.micro with CPU restriction (no outlier)', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[23]:
The stressng-cpu-jenkin benchmark is a collection of (non-cryptographic) hash functions for multi-byte keys. See Jenkins hash function from Wikipedia for more details.
Now we use 9 other benchmark programs to verify this result. These programs are,
Read verification tests data.
In [24]:
df_verification = pd.read_csv('verification/results/2/alltests_with_normalized_results_1.1.csv')
Show number of test benchmarks.
In [25]:
len(df_verification) / 2
Out[25]:
Order the test results by the absolute of normalized value
In [26]:
df_verification_rank = df_verification.reindex(df_verification.normalized.abs().sort_values(ascending=0).index)
df_verification_rank.head(8)
Out[26]:
Histogram of frequency of normalized value.
In [27]:
df_verification_issdm_6 = df_verification[df_verification['machine'] == 'issdm-6']
df_verification_issdm_6.normalized.hist(color='y', bins=150,figsize=(20,10), xlabelsize=20, ylabelsize=20)
plt.title('verification tests run on issdm-6', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[27]:
Print the max the min normalized value,
In [28]:
print(
df_verification_issdm_6['normalized'].max(),
df_verification_issdm_6['normalized'].min()
)
The average of noramlized value is,
In [29]:
df_verification_issdm_6['normalized'].mean()
Out[29]:
If we remove all nbench tests, the frequency histogram changes to
In [30]:
df_verification_issdm_6_no_nbench = df_verification_issdm_6[~df_verification_issdm_6['benchmark'].str.startswith('nbench')]
df_verification_issdm_6_no_nbench.normalized.hist(color='greenyellow', bins=150,figsize=(20,10), xlabelsize=20, ylabelsize=20)
plt.title('verification tests run on issdm-6 (no nbench)', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[30]:
The max the min normalized value changes to,
In [31]:
print(
df_verification_issdm_6_no_nbench['normalized'].max(),
df_verification_issdm_6_no_nbench['normalized'].min()
)
The average of noramlized value changes to,
In [32]:
df_verification_issdm_6_no_nbench['normalized'].mean()
Out[32]:
Histogram of frequency of normalized value.
In [33]:
df_verification_t2_micro = df_verification[df_verification['machine'] == 't2.micro']
df_verification_t2_micro.normalized.hist(color='y', bins=150,figsize=(20,10), xlabelsize=20, ylabelsize=20)
plt.title('verification tests run on t2.micro', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[33]:
The average of noramlized value of the verification benchmarks is,
In [34]:
df_verification_t2_micro['normalized'].mean()
Out[34]:
Let's see the frequency histogram after removing right-most four outliers.
In [35]:
df_verification_top_benchmakrs = df_verification_rank[df_verification_rank['machine'] == 't2.micro'].head(4)['benchmark']
df_verification_t2_micro_no_outliers = df_verification_t2_micro[~df_verification_t2_micro['benchmark'].isin(df_verification_top_benchmakrs)]
df_verification_t2_micro_no_outliers.normalized.hist(color='greenyellow', bins=150,figsize=(20,10), xlabelsize=20, ylabelsize=20)
plt.title('verification tests on t2.micro (no outliers)', fontsize=30)
plt.xlabel('Normalized Value (re-execution / original)', fontsize=25)
plt.ylabel('Frequency (# of benchmarks)', fontsize=25)
Out[35]:
Print the max the min normalized value,
In [36]:
print(
df_verification_t2_micro_no_outliers['normalized'].max(),
df_verification_t2_micro_no_outliers['normalized'].min()
)
The average of noramlized value without the four outliners is,
In [37]:
df_verification_t2_micro_no_outliers['normalized'].mean()
Out[37]: