[PUBLIC] Analysis of CLBlast client multiple sizes

Overview

This Jupyter Notebook analyses the performance that CLBlast (single configuaration) achieves across a range of sizes.

Get the experimental data from DropBox

NB: Please ignore this section if you are not interested in re-running or modifying this notebook.

The experimental data was collected on the experimental platform and archived as follows:

$ cd `ck find ck-math:script:<...>`
$ python <...>.py
$ ck zip local:experiment:* --archive_name=<...>.zip

It can be downloaded and extracted as follows:

$ wget <...>.zip
$ ck add repo:<....> --zip=<....>.zip --quiet

Data wrangling code

NB: Please ignore this section if you are not interested in re-running or modifying this notebook.

Includes

Standard



In [ ]:

    
import os
import sys
import json
import re

Scientific

If some of the scientific packages are missing, please install them using:

# pip install jupyter pandas numpy matplotlib



In [ ]:

    
import IPython as ip
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mp



In [ ]:

    
print ('IPython version: %s' % ip.__version__)
print ('Pandas version: %s' % pd.__version__)
print ('NumPy version: %s' % np.__version__)
print ('Seaborn version: %s' % sns.__version__) # apt install python-tk
print ('Matplotlib version: %s' % mp.__version__)



In [ ]:

    
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline



In [ ]:

    
from IPython.display import Image
from IPython.core.display import HTML

Collective Knowledge

If CK is not installed, please install it using:

# pip install ck



In [ ]:

    
import ck.kernel as ck
print ('CK version: %s' % ck.__version__)

Define helper functions



In [ ]:

    
# Return the number of floating-point operations for C = alpha * A * B + beta * C,
# where A is a MxK matrix and B is a KxN matrix.
def xgemm_flops(alpha, beta, M, K, N):
    flops_AB = 2*M*N*K if alpha!=0 else 0
    flops_C = 2*M*N if beta!=0 else 0
    flops = flops_AB + flops_C
    return flops



In [ ]:

    
# Return GFLOPS (Giga floating-point operations per second) for a known kernel and -1 otherwise.
def GFLOPS(kernel, run_characteristics, time_ms):
    if kernel.lower().find('xgemm') != -1:
        time_ms = np.float64(time_ms)
        alpha = np.float64(run_characteristics['arg_alpha'])
        beta  = np.float64(run_characteristics['arg_beta'])
        M = np.int64(run_characteristics['arg_m'])
        K = np.int64(run_characteristics['arg_k'])
        N = np.int64(run_characteristics['arg_n'])
        return (1e-9 * xgemm_flops(alpha, beta, M, K, N)) / (1e-3 * time_ms)
    else:
        return (-1.0)



In [ ]:



In [ ]:

    
def convert2int(s):
    if s[-1]=='K':
        return np.int64(s[0:-1])*1024
    else:
        return np.int64(s)



In [ ]:

    
def args_str(kernel, run):
    args = ''
    if kernel.lower().find('xgemm') != -1:
        args = 'alpha=%s, beta=%s, M=%s, K=%s, N=%s' % \
            (run['arg_alpha'], run['arg_beta'], run['arg_m'], run['arg_k'], run['arg_n'])
    return args

Access the experimental data



In [ ]:

    
def get_experimental_results(repo_uoa='explore-matrix-size-acl-sgemm-opencl-odroid-xu3', tags=''):
    module_uoa = 'experiment'
    r = ck.access({'action':'search', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'tags':tags})
    if r['return']>0:
        print ("Error: %s" % r['error'])
        exit(1)
    experiments = r['lst']
    
    dfs = []
    for experiment in experiments:
        data_uoa = experiment['data_uoa']
        r = ck.access({'action':'list_points', 'repo_uoa':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
        if r['return']>0:
            print ("Error: %s" % r['error'])
            exit(1)
        print experiment
        for point in r['points']:
            with open(os.path.join(r['path'], 'ckp-%s.0001.json' % point)) as point_file:
                point_data_raw = json.load(point_file)
                characteristics_list = point_data_raw['characteristics_list']
                num_repetitions = len(characteristics_list)
                print characteristics_list
                # Obtain column data.
                data = [
                    {
                        
                        'repetition_id': repetition_id,
                        'm': convert2int(characteristics['run']['m']),
                        'n': convert2int(characteristics['run']['n']),
                        'k': convert2int(characteristics['run']['k']),
                        #'mnk': convert2int(characteristics['run']['m'][0]) * convert2int(characteristics['run']['n'][0]) * convert2int(characteristics['run']['k'][0]),
                        'G': np.float32(characteristics['run']['GFLOPS_1'])
                        #'strategy'  : tuner_output['strategy'],
                        #'config_id': config_id,
                        #'config' : config['parameters'],
                        #'kernel' : config['kernel']
                        #'args_id' : args_str(config['kernel'], characteristics['run']),
                        #'ms' : np.float64(config['time']),
                        #'GFLOPS' : GFLOPS(config['kernel'], characteristics['run'], config['time'])
                    }
                    for (repetition_id, characteristics) in zip(range(num_repetitions), characteristics_list) 
                    #for (m,n,k,G,) in characteristics['run']
                    #for (config_id, config) in zip(range(len(tuner_output['result'])), tuner_output['result'])
                ]
            #print data    
             #Construct a DataFrame.
            df = pd.DataFrame(data)
            # Set columns and index names.
            df.columns.name = 'characteristics'
            df.index.name = 'index'
            df = df.set_index(['m', 'n', 'k', 'repetition_id'])
            # Append to the list of similarly constructed DataFrames.
            dfs.append(df)
    # Concatenate all constructed DataFrames (i.e. stack on top of each other).
    result = pd.concat(dfs)
    return result.sortlevel(result.index.names)



In [ ]:

    
df = get_experimental_results(tags='acl-sgemm-opencl')
pd.options.display.max_columns = len(df.columns)
pd.options.display.max_rows = len(df.index)

df



In [ ]:

    
df = df.sortlevel(df.index.names[3])
#df.sort_value(level=df.index.names[3])
#df = df.sort_values('mnk')



In [ ]:

    
#pd.options.display.max_columns=2
#df = df.reset_index('mnk').sort_values('mnk')



In [ ]:

    
df_mean = df.groupby(level=df.index.names[:-1]).mean()
df_std = df.groupby(level=df.index.names[:-1]).std()



In [ ]:

    
df_mean.T \
    .plot(yerr=df_std.T, title='GFLOPS',
          kind='bar', rot=0, ylim=[0,20], figsize=[20, 12], grid=True, legend=True, colormap=cm.autumn, fontsize=16)



In [ ]:

    
kernel = df.iloc[0].name[0]
kernel