This Jupyter Notebook analyses the performance that CLBlast achieves across a range of matrix sizes and adaptive configurations.
NB: Please ignore this section if you are not interested in re-running or modifying this notebook.
In [ ]:
import os
import sys
import json
import re
If some of the scientific packages are missing, please install them using:
# pip install jupyter pandas numpy matplotlib
In [ ]:
import IPython as ip
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mp
In [ ]:
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Seaborn version: %s' % sns.__version__) # apt install python-tk
print ('Matplotlib version: %s' % mp.__version__)
In [ ]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline
In [ ]:
default_title = 'Firefly RK3399'
default_ylabel = 'GFLOPS'
default_colormap = cm.autumn
default_figsize = [16, 8]
default_dpi = 200
default_fontsize = 16
In [ ]:
if mp.__version__[0]=='2': mp.style.use('classic')
mp.rcParams['figure.max_open_warning'] = 50
mp.rcParams['figure.figsize'] = default_figsize
mp.rcParams['figure.dpi'] = default_dpi
mp.rcParams['font.size'] = default_fontsize
mp.rcParams['legend.fontsize'] = 'medium'
In [ ]:
from IPython.display import Image, display
def display_in_full(df):
pd.options.display.max_columns = len(df.columns)
pd.options.display.max_rows = len(df.index)
display(df)
If CK is not installed, please install it using:
# pip install ck
In [ ]:
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)
In [ ]:
# Return an integer from a string or a list of strings.
# Deal with CLBlast abbreviations (e.g. 1K = 1024).
def convert2int(s):
s = s if type(s) is not list else s[0]
i = np.int64(s) if s[-1]!='K' else np.int64(s[0:-1])*1024
return i
In [ ]:
# Return the number of floating-point operations for C = alpha * A * B + beta * C,
# where A is a MxK matrix and B is a KxN matrix.
def xgemm_flops(alpha, beta, M, K, N):
flops_AB = 2*M*N*K if alpha!=0 else 0
flops_C = 2*M*N if beta!=0 else 0
flops = flops_AB + flops_C
return flops
In [ ]:
# Return GFLOPS (Giga floating-point operations per second) for xGEMM given run characteristics (rc).
def GFLOPS(rc):
alpha = np.float64(rc['alpha'])
beta = np.float64(rc['beta'])
m = convert2int(rc['m'])
n = convert2int(rc['n'])
k = convert2int(rc['k'])
Gflops = 1e-9 * xgemm_flops(alpha, beta, m, n, k)
seconds = np.float64(rc['execution_time'])
return (Gflops / seconds)
In [ ]:
# Return an (m, k, n) tuple given run characteristics (rc).
def mkn(rc):
m = convert2int(rc['m'])
n = convert2int(rc['n'])
k = convert2int(rc['k'])
s = '(%d, %d, %d)' % (m, k, n)
return s
In [ ]:
def get_experimental_results(repo_uoa='local', repo_tag='explore-clblast-matrix-size-client'):
module_uoa = 'experiment'
r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':repo_tag})
if r['return']>0:
print ("Error: %s" % r['error'])
exit(1)
experiments = r['lst']
dfs = []
for experiment in experiments:
data_uoa = experiment['data_uoa']
r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
if r['return']>0:
print ("Error: %s" % r['error'])
exit(1)
all_tags = r['dict']['tags']
model_tags = [ tag for tag in all_tags if tag in ['AlexNet', 'GoogleNet', 'SqueezeNet1.1'] ]
model = model_tags[0]
relevant_tags = [ tag for tag in all_tags if tag not in [ model, repo_tag ] ]
lib = relevant_tags[0][len('clblast-tune-'):]
for point in r['points']:
with open(os.path.join(r['path'], 'ckp-%s.0001.json' % point)) as point_file:
point_data_raw = json.load(point_file)
characteristics_list = point_data_raw['characteristics_list']
num_repetitions = len(characteristics_list)
# Obtain column data.
data = [
{
'lib': lib,
'model': model,
'(m, k, n)': mkn(characteristics['run']),
'm': convert2int(characteristics['run']['m']),
'n': convert2int(characteristics['run']['n']),
'k': convert2int(characteristics['run']['k']),
'time_ms': np.float32(characteristics['run']['execution_time'])*1e3,
'GFLOPS' : GFLOPS(characteristics['run']),
'repetition_id': repetition_id
}
for (repetition_id, characteristics) in zip(range(num_repetitions), characteristics_list)
]
# Construct a DataFrame.
df = pd.DataFrame(data)
# Set columns and index names.
df.columns.name = 'characteristics'
df.index.name = 'index'
df = df.set_index(['lib', 'model', '(m, k, n)', 'repetition_id'])
# Append to the list of similarly constructed DataFrames.
dfs.append(df)
# Concatenate all constructed DataFrames (i.e. stack on top of each other).
result = pd.concat(dfs)
return result.sort_index(level=result.index.names)
In [ ]:
df = get_experimental_results(repo_uoa='explore-matrix-size-clblast-height-firefly-rk3399',
repo_tag='explore-clblast-matrix-size-client')
In [ ]:
# display_in_full(df)
In [ ]:
df_mean = df['GFLOPS'].groupby(level=df.index.names[:-1]).mean().unstack('lib')
df_std = df['GFLOPS'].groupby(level=df.index.names[:-1]).std().unstack('lib')
In [ ]:
def plot(df_mean, df_std, title=default_title, ylabel='GFLOPS', legend_loc='upper left', rot=0):
# Assuming df_mean is sorted in asceding order, take the last row.
ymax = df_mean.iloc[-1].max()*1.1
ax = df_mean.plot(yerr=df_std,
title=default_title, kind='bar', rot=rot, ylim=[0,ymax], figsize=default_figsize, width=0.9,
grid=True, legend=True, colormap=cm.autumn, fontsize=default_fontsize)
ax.set_title(title, fontsize=default_fontsize)
ax.set_xlabel(df_mean.index.name, fontsize=default_fontsize)
ax.set_ylabel(ylabel, fontsize=default_fontsize)
ax.legend(loc=legend_loc)
In [ ]:
step = 5
for model in [ 'AlexNet', 'GoogleNet', 'SqueezeNet1.1' ]:
num_rows = df_mean.loc[model].shape[0]
df_mean_model_sorted_by_master = df_mean.loc[model].sort_values(by='master', axis=0)
df_std_model_sorted_by_master = df_std.loc[model].loc[df_mean_model_sorted_by_master.index]
for lower in range(0, num_rows, step):
upper = min(lower+step, num_rows)
title = '%s: (rows %d to %d out of %d)' % (model, lower+1, upper, num_rows)
plot(df_mean_model_sorted_by_master.iloc[lower:upper],
df_std_model_sorted_by_master.iloc[lower:upper],
title=title)