ANN-SoLo profiling


In [1]:
%matplotlib inline
import io
import os
import pstats

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns
import pandas as pd

In [2]:
# plot styling
plt.style.use(['seaborn-white', 'seaborn-paper'])
plt.rc('font', family='serif')
sns.set_palette('Set1')
sns.set_context('paper', font_scale=1.3)    # single-column figure

In [3]:
prof_dir = '../../data/processed/iprg2012/profiling'

In [4]:
def extract_cum_time(stats, stream, func):
    stats.print_stats(func)
    stream.seek(0)
    return float(stream.read().strip().split('\n')[-1].split()[3])

def process_prof(filename, version=2):
    stream = io.StringIO()
    stats = pstats.Stats(filename, stream=stream)

    mode = 'Brute-force' if 'bf' in filename else 'ANN-SoLo'
    precursor_tol_mass = 20 if 'std' in filename else 300
    precursor_tol_mode = 'ppm' if 'std' in filename else 'Da'
    prof = {'mode': mode,
            'precursor_tol_mass': precursor_tol_mass,
            'precursor_tol_mode': precursor_tol_mode,
            'libio': extract_cum_time(stats, stream, 'get_spectrum'),
            'rank': extract_cum_time(stats, stream, 'get_best_match'),
            'version': version}
    total = extract_cum_time(stats, stream, 'builtins.exec')
    select_key = ('_get_library_candidates'
                  if version == 2 else '_filter_library_candidates')
    prof['select'] = (extract_cum_time(stats, stream, select_key)
                      - prof['libio'])
    prof['other'] = total - prof['select'] - prof['rank'] - prof['libio']
    
    return prof

In [5]:
profiling = [process_prof(
    os.path.join(prof_dir, 'ann_oms_shifted_old.prof'), 1)]
for filename in ['bf_std.prof', 'bf_oms_shifted.prof',
                 'ann_oms_shifted.prof']:
    profiling.append(process_prof(os.path.join(prof_dir, filename)))

profiling = pd.DataFrame(profiling)

In [6]:
width = 7
height = width / 1.618    # golden ratio
fig, ax = plt.subplots(figsize=(width, height))

profiling_v2 = profiling[profiling['version'] == 2]
profiling_bar = profiling_v2[['select', 'rank', 'libio', 'other']]
# Convert to relative time percentages.
profiling_bar = profiling_bar.div(profiling_bar.sum(axis=1), axis=0) * 100
profiling_bar.columns = ['Candidate selection', 'Candidate ranking', 'I/O',
                         'Other']

profiling_bar.plot.bar(ax=ax, stacked=True)

ax.set_ylabel('Relative time consumption')

xtick_labels = (profiling_v2['mode'].map(str) + '\n' +
                profiling_v2['precursor_tol_mass'].map(str) + ' ' +
                profiling_v2['precursor_tol_mode'].map(str))
ax.set_xticklabels(xtick_labels, rotation='horizontal')

ax.yaxis.set_major_formatter(mticker.PercentFormatter())

ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=2)

sns.despine()

plt.savefig('profiling_rel.pdf', dpi=300, bbox_inches='tight')
plt.show()
plt.close()



In [7]:
width = 7
height = width / 1.618    # golden ratio
fig, ax = plt.subplots(figsize=(width, height))

profiling_bar = (profiling[profiling['mode'] == 'ANN-SoLo']
                 [['select', 'rank', 'libio', 'other']])
profiling_bar.columns = ['Candidate selection', 'Candidate ranking', 'I/O',
                         'Other']
# Convert time to minutes.
profiling_bar = profiling_bar.div(60, axis=0)

profiling_bar.plot.bar(ax=ax, stacked=True)

ax.set_xticklabels(['ANN-SoLo\nJPR 2018', 'ANN-SoLo'], rotation='horizontal')
ax.set_ylabel('Runtime (min)')

h, l = ax.get_legend_handles_labels()
ax.legend(h[::-1], l[::-1], loc='upper right')

sns.despine()

plt.savefig('profiling_abs.pdf', dpi=300, bbox_inches='tight')
plt.show()
plt.close()