In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from argparse import Namespace
import misc.logging_utils as logging_utils

args = Namespace()
logger = logging_utils.get_ipython_logger()

In [ ]:
import matplotlib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns; sns.set(style='white', color_codes=True, palette='muted')

import misc.mpl_utils as mpl_utils

import yaml

import riboutils.ribo_utils as ribo_utils

In [ ]:
# the keys do not matter, but they must match in all dictionaries

config_files = {
    'my_ribo_project': "/path/to/my/config.yaml"
}

# the counts produced with get-all-read-filtering-counts
alignment_counts_files = {
    'my_ribo_project': "/path/to/my/read-filtering-counts.csv.gz"
}

# this file will be used for the output if args.without_rrna is False
out_files = {
    'my_ribo_project': "/path/to/my/read-filtering-counts.pdf" # or png
}

# this file will be used for the output if args.without_rrna is True
without_rrna_files = {
    'my_ribo_project': "/path/to/my/read-filtering-counts.no-rrna.pdf"
}

In [ ]:
# this must match the dictionaries
data = 'my_ribo_project'

args.without_rrna = False

args.fontsize = 20
args.legend_fontsize = 15

if args.without_rrna:
    args.ymax = 2e7+1
    args.ystep = 5e6
else:
    args.ymax = 1.5e8+1
    args.ystep = 2.5e7


args.alignment_counts_order = [
    'raw_data_count', 
    'without_adapters_count', 
    'without_rrna_count', 
    'genome_count', 
    'unique_count', 
    'length_count'
]

args.alignment_counts_names = [
    'Poor quality', 
    'Ribosomal', 
    'No alignment', 
    'Multimappers', 
    'Non-periodic', 
    'Usable'
]

args.without_rrna_order = [
    'without_rrna_count', 
    'genome_count', 
    'unique_count', 
    'length_count'
]

args.without_rrna_names = [
    "No alignment", 
    "Multimappers", 
    "Non-periodic", 
    "Usable"
]

if args.without_rrna:
    args.alignment_counts_order = args.without_rrna_order
    args.alignment_counts_names = args.without_rrna_names


args.config = config_files[data]
args.alignment_counts = alignment_counts_files[data]


args.out = out_files[data]
if args.without_rrna:
    args.out = without_rrna_files[data]

args.alignment_counts_order = args.alignment_counts_order[::-1]
args.alignment_counts_names = args.alignment_counts_names[::-1]

In [ ]:
msg = "Reading config file"
logger.info(msg)

config = yaml.load(open(args.config))
sample_name_map = ribo_utils.get_sample_name_map(config)

msg = "Reading counts"
logger.info(msg)

alignment_counts = pd.read_csv(args.alignment_counts)
alignment_counts = alignment_counts.sort_values('note').reset_index()

names = alignment_counts['note']

alignment_diff_counts = mpl_utils.get_diff_counts(alignment_counts[args.alignment_counts_order])
df = pd.DataFrame(alignment_diff_counts)
df.columns = args.alignment_counts_names
df['name'] = names

# have to use apply rather than map because of weird interaction
# with the missing_key_dictionary
df['display_name'] = df['name'].apply(lambda x: sample_name_map[x])

In [ ]:
fig, ax = plt.subplots()

pal = sns.palettes.color_palette(palette="Set3", n_colors=len(args.alignment_counts_names))

gap = 0.15
yticks = np.arange(0, args.ymax, args.ystep)

bars = mpl_utils.create_stacked_bar_graph(
    ax,
    alignment_diff_counts,
    colors=pal,
    x_tick_labels=df['display_name'],
    y_ticks=yticks,
    y_tick_labels=yticks,
    gap=gap,
    end_gaps=True,
    stack_labels=args.alignment_counts_names,
    y_title='Reads',
    log=False,
    font_size=args.fontsize,
    edge_colors='0.5'
)

ax.legend(
    loc='upper center',
    bbox_to_anchor=(0.5, -0.5),
    ncol=3,
    fontsize=args.legend_fontsize,
    title="Filter",
    frameon=True,
    framealpha=0.9
)

if args.without_rrna:
    ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
else:
    ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.0e'))
    
mpl_utils.set_label_fontsize(ax, args.fontsize)
mpl_utils.set_legend_title_fontsize(ax, args.fontsize)

if args.out is not None:
    #fig.tight_layout()
    fig.savefig(args.out, bbox_inches='tight')

In [ ]: