In [1]:
import plot_utils as pu
In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [3]:
df = pd.read_csv("./data/simulations_kraken_centrifuge.txt", delimiter=",", index_col=0)
In [4]:
df.dtypes
Out[4]:
In [5]:
df.head()
Out[5]:
In [6]:
# df = df.sort_values("tool")
df_means = df.groupby(["aligner", "site"]).mean().reset_index()
# tools = ["burst", "bowtie2", "centrifuge", "kraken", "utree"]
# index_tools = dict(zip(tools, range(len(tools))))
# df_means["index_tools"] = df_means.tool.map(index_tools)
# display_names = dict(zip(tools, ["BURST", "Bowtie2", "Centrifuge", "Kraken", "UTree"]))
# df_means["display_names"] = df_means.tool.map(display_names)
# colors = dict(zip(tools, ["#1a1895", "#000000", "#0c5050", "#148d4a", "#bd86e8"]))
# df_means["colors"] = df_means.tool.map(colors)
# markers = dict(zip(tools, ["s", "v", "o", "^", "d"]))
# df_means["markers"] = df_means.tool.map(markers)
# df_means.sort_values('index_tools', inplace=True)
In [7]:
df_means.columns
Out[7]:
In [ ]:
In [8]:
# We are interested in the speed up and efficiency of each of the aligners
# y-axis: RAM Usage
# x-axis: N (workers)
# sns.set_style("whitegrid")
sns.set(context="paper", style="ticks", palette="colorblind", font='serif', font_scale=1.5, color_codes=True, rc=pu.figure_setup())
fig_size = pu.get_fig_size(13, 10)
fig, ax = plt.subplots(figsize=fig_size)
plot = sns.barplot(x="aligner", y="f1", hue="site", data=df_means, ax=ax)
plot._remove_legend(plot.get_legend())
ax.set_ylim(.7, 1.0)
pu.stylize_axes(ax)
pu.stylize_fig(fig)
for item in ax.get_xticklabels():
item.set_rotation(90)
ax.set_ylabel('f1-score')
ax.set_xlabel('')
artists = fig.legend(loc="center left", bbox_to_anchor=(1, .9))
artists.set_frame_on(False)
pu.save_plot(fig, "simulations", artists=(artists,))
In [ ]:
In [9]:
# artists.