In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from argparse import Namespace
import misc.logging_utils as logging_utils

args = Namespace()
logger = logging_utils.get_ipython_logger()

In [ ]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

import pandas as pd
import seaborn as sns; sns.set(style='white')
import yaml

import riboutils.ribo_filenames as filenames

import misc.mpl_utils as mpl_utils

In [ ]:
args.config = "/path/to/my/config.yaml"
args.distribution = "/path/to/my/read-length-distribution.csv.gz"
args.sample = "my-sample-name"
args.out = "/path/to/my/read-length-distribution.pdf"

args.is_unique = True

args.title = None
args.min_read_length = 22
args.max_read_length = 35

args.fontsize = 20
args.ymax = 1.5e6

In [ ]:
msg = "Reading the config file"
logger.info(msg)
config = yaml.load(open(args.config))

msg = "Reading the read length distributions"
logger.info(msg)
distribution_df = pd.read_csv(args.distribution)

msg = "Constructing sample base name"
logger.info(msg)
note_str = filenames.get_note_string(config.get('note'))
unique_str = filenames.get_unique_string(args.is_unique)
sample_name = "{}{}{}".format(args.sample, note_str, unique_str)

logger.info("sample base name: {}".format(sample_name))
 
msg = "Filtering read lengths"
logger.info(msg)

m_sample = distribution_df['basename'] == sample_name
m_min_read_length = distribution_df['length'] >= args.min_read_length
m_max_read_length = distribution_df['length'] <= args.max_read_length
m_to_view = m_min_read_length & m_max_read_length & m_sample

lengths_to_view = distribution_df[m_to_view]
lengths_to_view = lengths_to_view.sort_values('length')

msg = "Creating title"
logger.info(msg)

title = args.title
if (title is None) and (args.config is not None):
    
    if 'riboseq_sample_name_map' in config:
        title = config['riboseq_sample_name_map'].get(args.sample)
        
        if args.is_unique:
            title = "{}, Unique only".format(title)
        
    
msg = "Creating the bar chart"
logger.info(msg)
fig, ax = plt.subplots()

lengths_to_view.plot.bar(ax=ax, x='length', y='count', title=title, legend=False)
ax.set_xlabel('Length', fontsize=args.fontsize)
ax.set_ylabel('Count', fontsize=args.fontsize)
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))


ax.set_ylim((0, args.ymax))
mpl_utils.remove_top_and_right_splines(ax)
mpl_utils.set_title_fontsize(ax, args.fontsize)
mpl_utils.set_ticklabels_fontsize(ax, args.fontsize)

msg = "Saving plot to disk"
logger.info(msg)
if args.out is not None:
    fig.savefig(args.out, bbox_inches='tight')

In [ ]: