In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from argparse import Namespace
import misc.logging_utils as logging_utils
args = Namespace()
logger = logging_utils.get_ipython_logger()
In [ ]:
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns; sns.set(style='white', color_codes=True, palette='muted')
import misc.mpl_utils as mpl_utils
import yaml
import riboutils.ribo_utils as ribo_utils
In [ ]:
# the keys do not matter, but they must match in all dictionaries
config_files = {
'my_ribo_project': "/path/to/my/config.yaml"
}
# the counts produced with get-all-read-filtering-counts
alignment_counts_files = {
'my_ribo_project': "/path/to/my/read-filtering-counts.csv.gz"
}
# this file will be used for the output if args.without_rrna is False
out_files = {
'my_ribo_project': "/path/to/my/read-filtering-counts.pdf" # or png
}
# this file will be used for the output if args.without_rrna is True
without_rrna_files = {
'my_ribo_project': "/path/to/my/read-filtering-counts.no-rrna.pdf"
}
In [ ]:
# this must match the dictionaries
data = 'my_ribo_project'
args.without_rrna = False
args.fontsize = 20
args.legend_fontsize = 15
if args.without_rrna:
args.ymax = 2e7+1
args.ystep = 5e6
else:
args.ymax = 1.5e8+1
args.ystep = 2.5e7
args.alignment_counts_order = [
'raw_data_count',
'without_adapters_count',
'without_rrna_count',
'genome_count',
'unique_count',
'length_count'
]
args.alignment_counts_names = [
'Poor quality',
'Ribosomal',
'No alignment',
'Multimappers',
'Non-periodic',
'Usable'
]
args.without_rrna_order = [
'without_rrna_count',
'genome_count',
'unique_count',
'length_count'
]
args.without_rrna_names = [
"No alignment",
"Multimappers",
"Non-periodic",
"Usable"
]
if args.without_rrna:
args.alignment_counts_order = args.without_rrna_order
args.alignment_counts_names = args.without_rrna_names
args.config = config_files[data]
args.alignment_counts = alignment_counts_files[data]
args.out = out_files[data]
if args.without_rrna:
args.out = without_rrna_files[data]
args.alignment_counts_order = args.alignment_counts_order[::-1]
args.alignment_counts_names = args.alignment_counts_names[::-1]
In [ ]:
msg = "Reading config file"
logger.info(msg)
config = yaml.load(open(args.config))
sample_name_map = ribo_utils.get_sample_name_map(config)
msg = "Reading counts"
logger.info(msg)
alignment_counts = pd.read_csv(args.alignment_counts)
alignment_counts = alignment_counts.sort_values('note').reset_index()
names = alignment_counts['note']
alignment_diff_counts = mpl_utils.get_diff_counts(alignment_counts[args.alignment_counts_order])
df = pd.DataFrame(alignment_diff_counts)
df.columns = args.alignment_counts_names
df['name'] = names
# have to use apply rather than map because of weird interaction
# with the missing_key_dictionary
df['display_name'] = df['name'].apply(lambda x: sample_name_map[x])
In [ ]:
fig, ax = plt.subplots()
pal = sns.palettes.color_palette(palette="Set3", n_colors=len(args.alignment_counts_names))
gap = 0.15
yticks = np.arange(0, args.ymax, args.ystep)
bars = mpl_utils.create_stacked_bar_graph(
ax,
alignment_diff_counts,
colors=pal,
x_tick_labels=df['display_name'],
y_ticks=yticks,
y_tick_labels=yticks,
gap=gap,
end_gaps=True,
stack_labels=args.alignment_counts_names,
y_title='Reads',
log=False,
font_size=args.fontsize,
edge_colors='0.5'
)
ax.legend(
loc='upper center',
bbox_to_anchor=(0.5, -0.5),
ncol=3,
fontsize=args.legend_fontsize,
title="Filter",
frameon=True,
framealpha=0.9
)
if args.without_rrna:
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
else:
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
mpl_utils.set_label_fontsize(ax, args.fontsize)
mpl_utils.set_legend_title_fontsize(ax, args.fontsize)
if args.out is not None:
#fig.tight_layout()
fig.savefig(args.out, bbox_inches='tight')
In [ ]: