In [1]:
import plot_utils as pu
In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [3]:
df = pd.read_csv("./data/timing_table.txt", delimiter="\t", index_col=0)
In [4]:
df.dtypes
Out[4]:
In [5]:
df.head()
Out[5]:
In [6]:
#9235008 ./combined_seqs.fna
infile_gbytes = 9235008/1e6
In [7]:
df = df.sort_values("tool")
df_means = df.groupby("tool").mean()
tools = ["burst", "bowtie2", "centrifuge", "kraken", "utree"]
index_tools = dict(zip(tools, range(len(tools))))
df_means["index_tools"] = df_means.index.map(index_tools)
display_names = dict(zip(tools, ["BURST", "Bowtie2", "Centrifuge", "Kraken", "UTree"]))
df_means["display_names"] = df_means.index.map(display_names)
colors = dict(zip(tools, ["#1a1895", "#000000", "#0c5050", "#148d4a", "#bd86e8"]))
df_means["colors"] = df_means.index.map(colors)
markers = dict(zip(tools, ["s", "v", "o", "^", "d"]))
df_means["markers"] = df_means.index.map(markers)
In [8]:
df_means.display_names.values
Out[8]:
In [9]:
# We are interested in the speed up and efficiency of each of the aligners
# y-axis: RAM Usage
# x-axis: N (workers)
# sns.set_style("whitegrid")
sns.set(context="paper", style="ticks", palette="colorblind", font='serif', font_scale=1.5, color_codes=True, rc=pu.figure_setup())
fig_size = pu.get_fig_size(10, 10)
fig, ax = plt.subplots(figsize=fig_size)
# sns.pointplot(x="tool", y="gbyte_memory", data=df, ax=ax)
for index, row in df_means.iterrows():
ax.plot([row.index_tools], [row.gbyte_memory], marker=row.markers, color=row.colors, markersize=12, label=row.display_names)
# ax.annotate("%.1f GB" % row.gbyte_memory, (row.index_tools, row.gbyte_memory), verticalalignment="bottom", horizontalalignment="left")
ax.yaxis.grid(True)
ax.xaxis.grid(False)
ax.get_xlim()
ax.plot(ax.get_xlim(), [infile_gbytes]*2, 'r', label='input file size', zorder=0)
pu.stylize_axes(ax)
pu.stylize_fig(fig)
pltname = "ram_bar"
plt.ylabel("gigabyte(s) memory")
for item in ax.get_xticklabels():
item.set_rotation(90)
labels = [item.get_text() for item in ax.get_xticklabels()]
labels[0] = 'BURST'
labels[1] = 'Bowtie2'
labels[2] = 'Centrifuge'
labels[3] = "Kraken"
labels[4] = "UTree"
ax.set_xticklabels(labels)
plt.tight_layout()
pu.save_plot(fig, "gbs_memory")