In [1]:
import lancet, re, pandas
import numpy as np
from itertools import product
import holoviews
from holoviews import *
import seaborn as sns
%load_ext holoviews.ipython
%output holomap='widgets' fig='svg' size=200
In [ ]:
benchmark_name = 'gpu_benchmark'
cortex_density = lancet.List('cortex_density', [48, 142, 162])
iterations = lancet.List('iterations', [1000])
resource=lancet.List('gpu',[True])
executable = '/afs/inf.ed.ac.uk/user/s11/s1137931/honours/topographica/timing_script.sh'
timing_cmd = lancet.ShellCommand(executable=executable, posargs=['cortex_density', 'iterations', 'gpu'])
p_space = cortex_density * iterations* resource
# Runs locally. A Launcher could be used to launch jobs with Grid Engine.
for i in range(10):
lancet.Launcher(benchmark_name, p_space, timing_cmd, output_directory='benchmarks', max_concurrency=1)()
print "Run %d finished..." % i
In [2]:
output_files = lancet.FilePattern('timing_files', './benchmarks/*-gpu_benchmark*/streams/*_tid_{tid:d}.o*')
In [3]:
def parse_timings(filename):
with open(filename, 'r') as infile:
content = infile.read()
host_m = re.search('^HOST:\t(?P<host>.*?)\n', content, re.MULTILINE)
param_m = re.search('^PARAMS:\t(?P<cortex_density>.*?)\t(?P<iterations>.*?)\t(?P<gpu>.*?)\n', content, re.MULTILINE)
version_m = re.search('^VERSION:\t(?P<version>.*?)\n', content, re.MULTILINE)
timing_m = re.findall('^[0-9]*.[0-9]*u [0-9]*.[0-9]*s ([0-9]*:[0-9]*.[0-9]*) ([0-9]*.[0-9])%', content, re.MULTILINE)
if host_m is None or param_m is None or len(timing_m) < 3:
raise Exception("The benchmark file %s has incorrect format" % filename)
timing_m = [(float(t.split(':')[0]) * 60 + float(t.split(':')[1]) + float('0.' + t.split('.')[1]), float(u)) for t, u in timing_m]
p = param_m.groupdict()
p['gpu'] = True if p['gpu'] == 'True' else False
p['iterations'] = int(p['iterations'])
p['cortex_density'] = float(p['cortex_density'])
result = {'topo_startup_time': timing_m[0][0], 'topo_startup_cpu': timing_m[0][1],
'sim_startup_time': timing_m[1][0] - timing_m[0][0], 'sim_startup_cpu': timing_m[1][1],
'sim_run_time': timing_m[-1][0] - timing_m[1][0],
'total_time': timing_m[-1][0], 'total_cpu': timing_m[-1][1],
'time_per_iteration': (timing_m[-1][0] - timing_m[1][0])/p['iterations']}
result.update(host_m.groupdict())
result.update(p)
result.update(version_m.groupdict())
return result
In [4]:
collected_timings = lancet.FileInfo(output_files, 'timing_files',
lancet.CustomFile(metadata_fn=parse_timings))
In [5]:
len(collected_timings)
Out[5]:
In [6]:
avg_timings = collected_timings.dframe.groupby(['host', 'version', 'cortex_density']).apply(np.mean)
melmac = avg_timings.ix['melmac.inf.ed.ac.uk']
stonesoup = avg_timings.ix['stonesoup.inf.ed.ac.uk']
In [7]:
constituents_traditional_vs_sparse = collected_timings.dframe.query('version in ["Traditional_topographica", "Sparse_topographica"] and host=="melmac.inf.ed.ac.uk"').groupby(['version', 'cortex_density']).apply(np.mean)
constituents_traditional_vs_sparse.rename(columns={'sim_run_time': 'Simulation runtime', 'topo_startup_time' : 'Topographica startup', 'sim_startup_time': 'Simulation startup'}, inplace=True)
In [8]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True]
versions, densities, parts = ["Traditional_topographica", "Sparse_topographica"], [48, 142, 162], ['Topographica startup', 'Simulation startup','Simulation runtime']
keys = product(versions, densities, parts)
Bars([(k, constituents_traditional_vs_sparse.query('version == "%s" and cortex_density == %d' % (k[0], k[1]))[k[2]].mean()) for k in keys], key_dimensions=['Version', 'Cortex density', Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[8]:
In [9]:
versions = ['Sparse_topographica', 'Sparse_topographica_dot_only', 'Sparse_topographica_empty']
sparse_timings = collected_timings.dframe.query('version in %s and host=="melmac.inf.ed.ac.uk"' % str(versions)).loc[:, ['cortex_density', 'version', 'sim_run_time']].groupby(['version', 'cortex_density']).mean()
In [10]:
learn_norm_time = sparse_timings.ix['Sparse_topographica'] - sparse_timings.ix['Sparse_topographica_dot_only']
dot_time = sparse_timings.ix['Sparse_topographica_dot_only'] - sparse_timings.ix['Sparse_topographica_empty']
other_time = sparse_timings.ix['Sparse_topographica'] - learn_norm_time - dot_time
learn_norm_time /= sparse_timings.ix['Sparse_topographica'] * 0.01
dot_time /= sparse_timings.ix['Sparse_topographica'] * 0.01
other_time /= sparse_timings.ix['Sparse_topographica'] * 0.01
In [11]:
learn_norm_time.rename(columns={'sim_run_time': 'hebbian_norm'}, inplace=True)
dot_time.rename(columns={'sim_run_time': 'dot'}, inplace=True)
other_time.rename(columns={'sim_run_time': 'other'}, inplace=True)
In [12]:
sparse_bottlenecks_percentage = learn_norm_time.join(dot_time).join(other_time)
In [13]:
sparse_bottlenecks_percentage
Out[13]:
In [14]:
sparse_bottlenecks_time_per_iteration = sparse_bottlenecks_percentage.apply(lambda row: row * sparse_timings.ix['Sparse_topographica'].loc[row.name]['sim_run_time'] * 0.01, axis=1) / 1000
sparse_topo_dot = sparse_bottlenecks_time_per_iteration['dot']
sparse_topo_hebbian_norm = sparse_bottlenecks_time_per_iteration['hebbian_norm']
sparse_topo_other = sparse_bottlenecks_time_per_iteration['other']
print "Sparse topographica times per iteration:"
sparse_bottlenecks_time_per_iteration
Out[14]:
In [15]:
mini_benchmark_files = lancet.FilePattern('timing_files', './mini_benchmark/*-gpu_benchmark*/streams/*_tid_{tid:d}.o*')
mini_timings = lancet.FileInfo(mini_benchmark_files, 'timing_files',
lancet.CustomFile(metadata_fn=parse_timings))
In [16]:
work_per_iteration = DFrame(mini_timings.dframe[['iterations', 'sim_run_time', 'cortex_density']])
In [17]:
%%opts Regression [show_legend=True apply_databounds=True]
work_per_iteration.regression('iterations', ['sim_run_time'], extents=(0, 0, 550, 550), mdims=['cortex_density'], reduce_fn=np.mean).overlay('cortex_density')
Out[17]:
In [18]:
versions, densities, parts = ['GPU_topographica_dot_only', 'Traditional_topographica_dot_only', 'Sparse_topographica_dot_only'], [48, 142, 162], ['other', 'dot']
gpu_dot_vs_traditional_dot = melmac.ix[versions]
keys = list(product(densities, versions, parts))
row_count = len(gpu_dot_vs_traditional_dot['time_per_iteration'])
gpu_dot_vs_traditional_dot['dot'] = np.random.randn(row_count)
gpu_dot_vs_traditional_dot['other'] = np.random.randn(row_count)
for (d, v, p) in keys:
if 'Traditional' in v:
if p == 'other':
gpu_dot_vs_traditional_dot.ix[v][p][d] = melmac.ix['Traditional_topographica_empty']['time_per_iteration'][d]
else:
gpu_dot_vs_traditional_dot.ix[v][p][d] = gpu_dot_vs_traditional_dot.ix[v]['time_per_iteration'][d] - melmac.ix['Traditional_topographica_empty']['time_per_iteration'][d]
else:
if p == 'other':
gpu_dot_vs_traditional_dot.ix[v][p][d] = melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
else:
gpu_dot_vs_traditional_dot.ix[v][p][d] = gpu_dot_vs_traditional_dot.ix[v]['time_per_iteration'][d] - melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
In [19]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True yticks=25]
Bars([(k, gpu_dot_vs_traditional_dot.query('version == "%s" and cortex_density == %d' % (k[1], k[0]))[k[2]]) for k in keys], key_dimensions=['Cortex density', Dimension('Version', values=versions), Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[19]:
In [20]:
gpu_dot_vs_traditional_dot[['other', 'dot', 'time_per_iteration']]
Out[20]:
In [21]:
versions, densities, parts = ['GPU_topographica_slow_normalisation', 'Traditional_topographica', 'Sparse_topographica'], [48, 142, 162], ['other', 'dot', 'hebbian_norm']
gpu_vs_traditional_vs_sparse = melmac.ix[versions]
keys = list(product(densities, versions, parts))
row_count = len(gpu_vs_traditional_vs_sparse['time_per_iteration'])
gpu_vs_traditional_vs_sparse['dot'] = np.random.randn(row_count)
gpu_vs_traditional_vs_sparse['hebbian_norm'] = np.random.randn(row_count)
gpu_vs_traditional_vs_sparse['other'] = np.random.randn(row_count)
for (d, v, p) in keys:
if 'Traditional' in v:
if p == 'other':
gpu_vs_traditional_vs_sparse.ix[v][p][d] = melmac.ix['Traditional_topographica_empty']['time_per_iteration'][d]
if p == 'dot':
gpu_vs_traditional_vs_sparse.ix[v][p][d] = melmac.ix['Traditional_topographica_dot_only']['time_per_iteration'][d] - melmac.ix['Traditional_topographica_empty']['time_per_iteration'][d]
if p == 'hebbian_norm':
gpu_vs_traditional_vs_sparse.ix[v][p][d] = melmac.ix['Traditional_topographica']['time_per_iteration'][d] - melmac.ix['Traditional_topographica_dot_only']['time_per_iteration'][d]
else:
if p == 'other':
gpu_vs_traditional_vs_sparse.ix[v][p][d] = melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
if p == 'dot':
gpu_vs_traditional_vs_sparse.ix[v][p][d] = gpu_dot_vs_traditional_dot.ix['GPU_topographica_dot_only']['dot'][d] if 'GPU' in v else gpu_dot_vs_traditional_dot.ix['Sparse_topographica_dot_only']['dot'][d]
if p == 'hebbian_norm':
gpu_vs_traditional_vs_sparse.ix[v][p][d] = gpu_vs_traditional_vs_sparse.ix[v]['time_per_iteration'][d] - (gpu_dot_vs_traditional_dot.ix['GPU_topographica_dot_only']['time_per_iteration'][d] if 'GPU' in v else gpu_dot_vs_traditional_dot.ix['Sparse_topographica_dot_only']['time_per_iteration'][d])
In [22]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True yticks=25]
Bars([(k, gpu_vs_traditional_vs_sparse.query('version == "%s" and cortex_density == %d' % (k[1], k[0]))[k[2]]) for k in keys], key_dimensions=['Cortex density', Dimension('Version', values=versions), Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[22]:
In [23]:
gpu_vs_traditional_vs_sparse[['other', 'dot', 'hebbian_norm', 'time_per_iteration']]
Out[23]:
In [24]:
versions, densities, parts = ['GPU_topographica_slow_normalisation', 'Synchronous_GPU_Kernels'], [48, 142, 162], ['other', 'dot', 'hebbian_norm']
gpu_slow_fast_norm_vs_tratitional = melmac.ix[versions]
keys = list(product(densities, versions, parts))
row_count = len(gpu_slow_fast_norm_vs_tratitional['time_per_iteration'])
for p in parts:
gpu_slow_fast_norm_vs_tratitional[p] = np.random.randn(row_count)
for (d, v, p) in keys:
if p == 'other':
gpu_slow_fast_norm_vs_tratitional.ix[v][p][d] = melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
if p == 'dot':
gpu_slow_fast_norm_vs_tratitional.ix[v][p][d] = gpu_dot_vs_traditional_dot.ix['GPU_topographica_dot_only']['dot'][d]
if p == 'hebbian_norm':
gpu_slow_fast_norm_vs_tratitional.ix[v][p][d] = gpu_slow_fast_norm_vs_tratitional.ix[v]['time_per_iteration'][d] - gpu_dot_vs_traditional_dot.ix['GPU_topographica_dot_only']['time_per_iteration'][d]
In [25]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True yticks=25 show_legend=False]
Bars([(k, gpu_slow_fast_norm_vs_tratitional.query('version == "%s" and cortex_density == %d' % (k[1], k[0]))[k[2]]) for k in keys], key_dimensions=['Cortex density', Dimension('Version', values=versions), Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[25]:
In [26]:
gpu_slow_fast_norm_vs_tratitional[['other', 'dot', 'hebbian_norm', 'time_per_iteration']]
Out[26]:
In [27]:
versions, densities, parts = ["Streamed_GPU_Kernels_HYB_dot_only", "Streamed_GPU_Kernels_CSR_dot_only"], [48, 142, 162], ['other', 'dot']
hyb_vs_csr = melmac.ix[versions]
keys = list(product(densities, versions, parts))
row_count = len(hyb_vs_csr['time_per_iteration'])
for p in parts:
hyb_vs_csr[p] = np.random.randn(row_count)
for (d, v, p) in keys:
if p == 'other':
hyb_vs_csr.ix[v][p][d] = melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
else:
hyb_vs_csr.ix[v][p][d] = hyb_vs_csr.ix[v]['time_per_iteration'][d] - melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
In [28]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True yticks=15 xrotation=90]
Bars([(k, hyb_vs_csr.query('version == "%s" and cortex_density == %d' % (k[1], k[0]))[k[2]]) for k in keys], key_dimensions=['Cortex density', Dimension('Version', values=versions), Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[28]:
In [29]:
hyb_vs_csr[['other', 'dot', 'time_per_iteration']]
Out[29]:
In [30]:
versions, densities, parts = ["Streamed_GPU_Kernels", "Synchronous_GPU_Kernels"], [48, 142, 162], ['other', 'dot', 'hebbian_norm']
sync_vs_streamed = melmac.ix[versions]
keys = list(product(densities, versions, parts))
row_count = len(sync_vs_streamed['time_per_iteration'])
for p in parts:
sync_vs_streamed[p] = np.random.randn(row_count)
for (d, v, p) in keys:
if p == 'other':
sync_vs_streamed.ix[v][p][d] = melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
if p == 'dot':
sync_vs_streamed.ix[v][p][d] = sync_vs_streamed.ix[v]['time_per_iteration'][d] - gpu_slow_fast_norm_vs_tratitional.ix['Synchronous_GPU_Kernels']['hebbian_norm'][d] - melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d]
if p == 'hebbian_norm':
sync_vs_streamed.ix[v][p][d] = sync_vs_streamed.ix[v]['time_per_iteration'][d] - melmac.ix['Sparse_topographica_empty']['time_per_iteration'][d] - sync_vs_streamed.ix[v]['dot'][d]
In [31]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True yticks=25 xrotation=90 show_legend=False]
Bars([(k, sync_vs_streamed.query('version == "%s" and cortex_density == %d' % (k[1], k[0]))[k[2]]) for k in keys], key_dimensions=['Cortex density', Dimension('Version', values=versions), Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[31]:
In [32]:
sync_vs_streamed[['other', 'dot', 'hebbian_norm', 'time_per_iteration']]
Out[32]:
In [33]:
gpu = np.array([[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.21191, 0.00000, 0.01938, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.11707, 0.00000, 0.00000, 0.19443, 0.00000,
0.00000, 0.00000]])
dense = np.array([[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.00000, 0.21191, 0.00000, 0.01938, 0.00000,
0.00000, 0.00000],
[ 0.00000, 0.11707, 0.00000, 0.00000, 0.19443, 0.00000,
0.00000, 0.00000]])
In [34]:
dense = {(x, y): dense[x][y] for x in range(len(dense[0])) for y in range(len(dense))}
gpu = {(x, y): gpu[x][y] for x in range(len(gpu[0])) for y in range(len(gpu))}
In [35]:
HeatMap(gpu, label='V1 activity') + HeatMap(dense, label='V1 activity')
Out[35]:
In [36]:
hosts, densities, parts = ['stonesoup.inf.ed.ac.uk', 'melmac.inf.ed.ac.uk'], [48, 142, 162], ['Time']
keys = list(product(densities, hosts, parts))
In [37]:
%%opts Bars [color_by=['stack'] apply_databounds=False show_grid=True yticks=25 xrotation=90]
Bars([(k, avg_timings.query('host == "%s" and version == "Streamed_GPU_Kernels" and cortex_density == %d' % (k[1], k[0]))['time_per_iteration'].mean()) for k in keys], key_dimensions=['Cortex density', Dimension('Host', values=hosts), Dimension('Part', values=parts)], value_dimensions=['Time (seconds)'])
Out[37]:
In [37]: