Simple interface to visualize gemm timings


In [76]:
#Import required libraries

import subprocess as sp
import numpy as np
import time
import os
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from collections import defaultdict
%pylab inline

# Make images slightly bigger
matplotlib.rcParams['savefig.dpi'] *= 1.5

# Maximum memory used in GB
MAXIMUM_MEMORY = 2

# Make a fair comparison
os.environ['OMP_NUM_THREADS'] = "1"

# To fix:
# Change x axis to log2 space.


Populating the interactive namespace from numpy and matplotlib

In [73]:
#Make a definition to run the script

def run_gemm(k, iters, kernal):
    """Runs Dr. Valeev's GEMM timing example. 
    
    Returns: (time (s), GLOP/s)
    """

    cmd = './gemm ' + ' '.join(str(x) for x in [k, iters,kernal])
    proc = sp.Popen(cmd, shell=True, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.STDOUT, close_fds=True)
    data = proc.stdout.read().split()
    
    # If block algorithm assertion failes
    if 'Assertion' == data[0]:
        return (np.nan, np.nan)

    return (float(data[-6]), float(data[-2]))


def gemm_timings(target_flops, kernal_list, trial_count, verbose=False):
    """Takes in"""
    
    output =[]
    klist = []
    
    for kernal in kernal_list:
        if verbose:
            t = time.time()
            print 'Starting %s kernal...' % kernal
            
        for k in range(4, 15, 2):

            k = 2**k
            # k_iter = int(target_flops/(trial_count*k**3))
            k_iter = int(target_flops/(k**3))


            #If it is too big, just continue
            if k_iter == 0:
                continue
                
            if (k**2*16/1E9) > MAXIMUM_MEMORY:
                print (k**2*16/1E9)
                continue
        
            for trial in range(trial_count):
                seconds, flops = run_gemm(k, k_iter, kernal)
                if np.isnan(seconds):
                    continue

                output.append([kernal, k, trial, seconds, flops])
                klist.append(k)
                
        if verbose:
            print '...finished %s kernal in %5.5f seconds.\n' % (kernal, time.time()-t)
    
    if verbose:
        print 'Largest k values was %d' % max(klist)
        
    df = pd.DataFrame(output, columns=['Kernal','k','trial','Time(s)','GFLOP/s'])
    return df

In [92]:
# Plot plain, block, and plan dgemm
trial_count = 5
target_flops = 5E9

kernal_list = ['plain','block16','blas']

gemm_df = gemm_timings(target_flops, kernal_list, trial_count, verbose=True)

sns.tsplot(gemm_df, time="k", unit='trial', condition='Kernal', value='GFLOP/s')


Starting plain kernal...
...finished plain kernal in 251.38913 seconds.

Starting block16 kernal...
...finished block16 kernal in 73.67537 seconds.

Starting blas kernal...
...finished blas kernal in 11.08876 seconds.

Largest k values was 1024
Out[92]:
<matplotlib.axes.AxesSubplot at 0x11d0fe450>

In [94]:
# Plot plain, block, and plan gemm results
trial_count = 5
target_flops = 5E9

kernal_list = ['block'+str(x) for x in [4,8,16,32]]

gemm_df = gemm_timings(target_flops, kernal_list, trial_count, verbose=True)

sns.tsplot(gemm_df, time="k", unit='trial', condition='Kernal', value='GFLOP/s')


Starting block4 kernal...
...finished block4 kernal in 177.06442 seconds.

Starting block8 kernal...
...finished block8 kernal in 136.28667 seconds.

Starting block16 kernal...
...finished block16 kernal in 80.18651 seconds.

Starting block32 kernal...
...finished block32 kernal in 74.49296 seconds.

Largest k values was 1024
Out[94]:
<matplotlib.axes.AxesSubplot at 0x11d4e4610>

In [ ]: