In [204]:
repo_uoa = 'explore-matrix-size-gemm-libs-dvdt-prof-firefly-rk3399-001'
NB: Please ignore this section if you are not interested in re-running or modifying this notebook.
The experimental data was collected on the experimental platform and archived as follows:
$ cd `ck find ck-math:script:<...>`
$ python <...>.py
$ ck zip local:experiment:* --archive_name=<...>.zip
It can be downloaded and extracted as follows:
$ wget <...>.zip
$ ck add repo:<repo_uoa> --zip=<....>.zip --quiet
NB: Please ignore this section if you are not interested in re-running or modifying this notebook.
In [205]:
import os
import sys
import json
import re
If some of the scientific packages are missing, please install them using:
# pip install jupyter pandas numpy matplotlib
In [206]:
import IPython as ip
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mp
In [207]:
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Seaborn version: %s' % sns.__version__) # apt install python-tk
print ('Matplotlib version: %s' % mp.__version__)
In [208]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline
In [209]:
from IPython.display import Image, display
def display_in_full(df):
pd.options.display.max_columns = len(df.columns)
pd.options.display.max_rows = len(df.index)
display(df)
If CK is not installed, please install it using:
# pip install ck
In [210]:
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)
In [211]:
# client: 'acl-sgemm-opencl-example' or 'clblast-tune'
def get_mnk(characteristics, client):
# dim: 'm', 'n', 'k'
def get_dim_int(characteristics, client, dim):
if client == 'clblast-tune':
dim_str = characteristics['run'][dim][0]
dim_int = np.int64(dim_str)
else:
dim_str = characteristics['run'][dim]
dim_int = np.int64(dim_str)
return dim_int
m = get_dim_int(characteristics, client, 'm')
n = get_dim_int(characteristics, client, 'n')
k = get_dim_int(characteristics, client, 'k')
return ('(%d, %d, %d)' % (m, n, k))
In [212]:
def get_GFLOPS(characteristics, client):
if client == 'acl-sgemm-opencl-example':
GFLOPS_str = characteristics['run']['GFLOPS_1']
else:
GFLOPS_str = characteristics['run']['GFLOPS_1'][0]
GFLOPS = np.float(GFLOPS_str)
return GFLOPS
In [213]:
def get_TimeMS(characteristics,client):
time_execution =characteristics['run'].get('ms_1')
return time_execution
print profiling
start = datetime.strptime(profiling['timestamp']['start'], '%Y-%m-%dT%H:%M:%S.%f')
end = datetime.strptime(profiling['timestamp']['end'], '%Y-%m-%dT%H:%M:%S.%f')
print (start.timestamp() * 1000)
print (end.timestamp() * 1000)
elapsed = (end.timestamp() * 1000) - (start.timestamp() * 1000)
return elapsed
In [214]:
default_colormap = cm.autumn
default_figsize = [20, 12]
default_dpi = 200
default_fontsize = 20
default_legend_fontsize = 'medium'
if mp.__version__[0]=='2': mp.style.use('classic')
mp.rcParams['figure.figsize'] = default_figsize
mp.rcParams['figure.dpi'] = default_dpi
mp.rcParams['font.size'] = default_fontsize
mp.rcParams['legend.fontsize'] = default_legend_fontsize
In [215]:
def plot(df_mean, df_std, rot=90, patch_fontsize=default_fontsize):
ax = df_mean.plot(yerr=df_std,
kind='bar', ylim=[0, 20], rot=rot, width=0.9, grid=True, legend=True,
figsize=default_figsize, colormap=default_colormap, fontsize=default_fontsize)
ax.set_title('ARM Compute Library vs CLBlast (dv/dt)', fontsize=default_fontsize)
ax.set_ylabel('SGEMM GFLOPS', fontsize=default_fontsize)
ax.legend(loc='upper right')
for patch in ax.patches:
text = '{0:2.1f}'.format(patch.get_height())
ax.annotate(text, (patch.get_x()*1.00, patch.get_height()*1.01), fontsize=patch_fontsize)
In [233]:
def get_experimental_results(repo_uoa='explore-matrix-size-gemm-libs-dvdt-prof-firefly-rk3399', tags='explore-matrix-size-libs-sgemm, acl-sgemm-opencl-example'):
module_uoa = 'experiment'
r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':tags})
if r['return']>0:
print ("Error: %s" % r['error'])
exit(1)
experiments = r['lst']
dfs = []
for experiment in experiments:
data_uoa = experiment['data_uoa']
r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
if r['return']>0:
print ("Error: %s" % r['error'])
exit(1)
for point in r['points']:
with open(os.path.join(r['path'], 'ckp-%s.0001.json' % point)) as point_file:
point_data_raw = json.load(point_file)
characteristics_list = point_data_raw['characteristics_list']
num_repetitions = len(characteristics_list)
client = data_uoa[len('explore-matrix-size-gemm-libs-'):]
# Obtain column data.
data = [
{
'client': client,
'(m, n, k)': get_mnk(characteristics, client),
'GFLOPS': get_GFLOPS(characteristics, client),
'dvdt_prof_info': characteristics['run'].get('dvdt_prof',[]),
'time (ms)' : get_TimeMS(characteristics,client),
'repetition_id': repetition_id
}
for (characteristics, repetition_id) in zip(characteristics_list, range(num_repetitions))
]
#Construct a DataFrame.
df = pd.DataFrame(data)
# Set columns and index names.
df.columns.name = 'characteristics'
df.index.name = 'index'
df = df.set_index(['client', '(m, n, k)', 'repetition_id','GFLOPS','time (ms)'])
# Append to the list of similarly constructed DataFrames.
dfs.append(df)
# Concatenate all constructed DataFrames (i.e. stack on top of each other).
result = pd.concat(dfs).unstack('client').swaplevel(axis=1)
return result.sort_index(level=result.index.names)
In [234]:
df = get_experimental_results(repo_uoa=repo_uoa)
display_in_full(df)
In [232]:
df_min = df \
.ix[df.groupby(level=df.index.names[:-1])['time (ms)'].idxmin()] \
.reset_index('repetition_id', drop=True)
df_min
In [ ]:
batch_size = 1
df_model_lib = df_min[['dvdt_prof_info']] \
.reset_index('platform', drop=True) \
.reorder_levels([ 'batch_size', 'model', 'lib']) \
.loc[batch_size] \
.sortlevel()
df_model_lib
In [ ]:
models = df_model_lib.index.levels[0]
libs = df_model_lib.index.levels[1]
In [ ]:
def concat(model, lib):
return '%s:%s' % (model, lib)
In [ ]:
def analyse_model_lib(df_model_lib, model, lib, min_pc=1.0):
trace = pw.index_calls(df_model_lib.loc[model].loc[lib]['dvdt_prof_info'])
# All kernel enqueues.
df_kernel_enqueues = pw.df_kernel_enqueues(pw.filter_calls(trace, ['clEnqueueNDRangeKernel']), unit='ms')
# Kernel enqueues that take at least 'min_pc' % of the execution time.
df_kernel_enqueues_cum_time_num = pw.df_kernel_enqueues_cumulative_time_num(df_kernel_enqueues, unit)
df_kernel_enqueues_cum_time_num.columns.name = concat(model, lib)
return df_kernel_enqueues_cum_time_num[df_kernel_enqueues_cum_time_num['** Execution time (%) **'] > min_pc]
In [ ]:
def analyse_xgemm_kernel(df_model_lib, model, lib, kernel):
# Get trace for lib and model.
trace = pw.index_calls(df_model_lib.loc[model].loc[lib]['dvdt_prof_info'])
# All calls to set kernel args.
set_args = pw.filter_calls(trace, ['clSetKernelArg'])
# All kernel enqueues.
nqs = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
# Construct a DataFrame with info about kernel enqueues.
df = pw.df_kernel_enqueues(nqs, unit='ms').swaplevel().ix[kernel]
df = df[['p3 - p2 (ms)', 'gws2']]
# As gws2 is always 1, we can use it to count the number of enqueues.
df.columns = [ '** Execution time (ms) **', '** Number of enqueues **' ]
df.columns.name = kernel
# Augment the DataFrame with columns for the (M, N, K) triples.
df['kSizeM'] = 'M'; df['bSizeM'] = 'MM'
df['kSizeN'] = 'N'; df['bSizeN'] = 'NN'
df['kSizeK'] = 'K'; df['bSizeK'] = 'KK'
# Initialise buckets.
buckets = init_buckets()
# Augment the DataFrame with the actual (M, N, K) triples.
mnk_triples = []; mmnnkk_triples = []
for nq in nqs:
if nq['name'] == kernel:
prof = nq['profiling']
(M, N, K) = ('M', 'N', 'K'); (MM, NN, KK) = ('MM', 'NN', 'KK')
for set_arg in set_args:
if (set_arg['call_index'] > nq['call_index']): break
if (set_arg['kernel'] != nq['kernel']): continue
arg_value = pc.hex_str_as_int(set_arg['arg_value'])
if (set_arg['arg_index'] == 0): M = arg_value; MM = arg_value
if (set_arg['arg_index'] == 1): N = arg_value; NN = arg_value
if (set_arg['arg_index'] == 2): K = arg_value; KK = arg_value
mnk_triples.append((M, N, K))
mmnnkk_triples.append(get_nearest_bucket(buckets, (M, N, K)))
df[['kSizeM', 'kSizeN', 'kSizeK']] = mnk_triples
df[['bSizeM', 'bSizeN', 'bSizeK']] = mmnnkk_triples
# Calculate Gflops and GFLOPS (Gflops/s).
df['** Gflops **'] = 2*df['kSizeM']*df['kSizeN']*df['kSizeK']*1e-9
df['** GFLOPS **'] = df['** Gflops **'] / (df['** Execution time (ms) **']*1e-3)
return df
In [ ]:
model_lib_kernel_analysis = {}
for model in models:
for lib in libs:
title = concat(model, lib)
print('== %s ==' % title)
try:
analysis = model_lib_analysis[title]
except:
print(' ... missing ...'); print(''); continue
for kernel in analysis.index:
if kernel.lower().find('xgemm') == -1: continue
analysis_xgemm = analyse_xgemm_kernel(df_model_lib, model, lib, kernel)
pd.options.display.max_columns = analysis_xgemm.columns.size
pd.options.display.max_rows = analysis_xgemm.index.size
display(analysis_xgemm)
analysis_xgemm_stats = analysis_xgemm.describe()
pd.options.display.max_columns = analysis_xgemm_stats.columns.size
pd.options.display.max_rows = analysis_xgemm_stats.index.size
display(analysis_xgemm_stats)
model_lib_kernel_analysis[concat(title, kernel)] = analysis_xgemm
print('')
print('')
In [ ]:
df = get_experimental_results(repo_uoa=repo_uoa)
display_in_full(df)
In [ ]:
df_mean = df.groupby(level=df.index.names[:-1]).mean()
df_std = df.groupby(level=df.index.names[:-1]).std()
plot(df_mean, df_std)