Copyright 2018 Google LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
In [0]:
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import seaborn
import json
from IPython.display import set_matplotlib_formats
seaborn.set_style("whitegrid")
set_matplotlib_formats('png')
In [0]:
def ScaleTime(num, unit):
'''Scales time to nanos.'''
if (unit == 'ns'):
return num
raise Exception('Unknown time unit: ' + unit)
def ParseJsonBenchmark(benchmarks):
'''Parses json benchmark data into a list of dictionaries.
"benchmarks": [
{
"name": "BM<FindHit_Hot, __gnu_cxx::hash_set, 4, Density::kMin>/16/iterations:20000000",
"iterations": 33572864,
"real_time": 1.2866112288867752e+01,
"cpu_time": 1.2866659097061246e+01,
"time_unit": "ns",
"label": "lf=0.50 cmp=1.248 size=194 num_sets=338"
},
...
]
'''
# we drop the /16/iterations:200000 portion because the size is more accurately kept in the label under 'size'
name_rx = re.compile(r'BM<(?P<benchmark>[^_]+)_+(?P<cache>Hot|Cold), (?P<table>[^,]+), +(?P<entry_size>\d+), Density::k(?P<density>Min|Max)>')
rows_to_parse = ['cmp', 'size', 'lf', 'num_sets']
res = []
for b in benchmarks:
try:
# Detect tests that we just skipped
if b['iterations'] == 0: continue
m = name_rx.search(b['name'])
row = m.groupdict()
row['time_ns'] = ScaleTime(b['real_time'], b['time_unit'])
row['cpu_ns'] = ScaleTime(b['cpu_time'], b['time_unit'])
for l in b['label'].split():
k, v = l.split('=')
row[k] = float(v)
res.append(row)
except:
print(b)
raise
return res
def FrameParsedBenchmark(data):
'''Returns a pivot table of the benchmark data from a list of dictionaries.'''
framed = pd.DataFrame(data)
desired_values = ['time_ns', 'lf', 'cpu_ns', 'cmp', 'allocs']
pivot_args = {
'columns': ['benchmark', 'table', 'entry_size', 'cache', 'density'],
'index' : ['size'],
}
results = {}
for v in desired_values:
if v in framed:
results[v] = pd.pivot_table(framed, values=v, **pivot_args)
return pd.concat(results)
In [4]:
# TODO: figure out if this works in Jupyter
from google.colab import files
uploaded = files.upload()
In [96]:
benchmarks_json = json.loads(next(iter(uploaded.values())))
data = FrameParsedBenchmark(ParseJsonBenchmark(benchmarks_json['benchmarks']))
for l in data.axes[1].levels:
print("%10s: %s" % (l.name, " ".join(l)))
In [290]:
t = ['absl::flat_hash_set', 'folly::F14ValueSet', 'folly::F14VectorSet']
fig, axes = plt.subplots(nrows=8, ncols=5, figsize=(25,30))
ax_it = iter(axes.flatten())
w = 'Cold'
for b in ['FindHit', 'FindMiss', 'InsertManyUnordered', 'Iterate']:
for o in ['Min', 'Max']:
for s in ['4', '8', '16', '32', '64']:
p = data[b].loc['time_ns'].loc(axis='columns')[t, s, w, o].interpolate().plot(
ax=next(ax_it), logx=True, legend=True, ylim=0.0)
# If t is not in the same order as the underlying data frame, the legend
# will be incorrectly labelled. Double check by removing this line
# and you will get the longer full name for the axis.
p.legend(t)
p.set_title("%s %s %s %s" % (b, s, w, o))
p.set_xlabel("Size in elements")
p.set_ylabel("Time in nanos")
plt.subplots_adjust(hspace=0.6)
#plt.savefig('compare_cold.pdf')
In [292]:
t = ['absl::flat_hash_set', 'folly::F14ValueSet', 'folly::F14VectorSet']
fig, axes = plt.subplots(nrows=8, ncols=5, figsize=(25,30))
ax_it = iter(axes.flatten())
w = 'Hot'
for b in ['FindHit', 'FindMiss', 'InsertManyUnordered', 'Iterate']:
for o in ['Min', 'Max']:
for s in ['4', '8', '16', '32', '64']:
p = data[b].loc['time_ns'].loc(axis='columns')[t, s, w, o].interpolate().plot(
ax=next(ax_it), logx=True, legend=True, ylim=0.0)
# If t is not in the same order as the underlying data frame, the legend
# will be incorrectly labelled. Double check by removing this line
# and you will get the longer full name for the axis.
p.legend(t)
p.set_title("%s %s %s %s" % (b, s, w, o))
p.set_xlabel("Size in elements")
p.set_ylabel("Time in nanos")
plt.subplots_adjust(hspace=0.6)
#plt.savefig('compare_hot.pdf')
In [ ]: