In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
from argparse import Namespace
import misc.logging_utils as logging_utils
args = Namespace()
logger = logging_utils.get_ipython_logger()
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import pandas as pd
import seaborn as sns; sns.set(style='white')
import yaml
import riboutils.ribo_filenames as filenames
import misc.mpl_utils as mpl_utils
In [ ]:
args.config = "/path/to/my/config.yaml"
args.distribution = "/path/to/my/read-length-distribution.csv.gz"
args.sample = "my-sample-name"
args.out = "/path/to/my/read-length-distribution.pdf"
args.is_unique = True
args.title = None
args.min_read_length = 22
args.max_read_length = 35
args.fontsize = 20
args.ymax = 1.5e6
In [ ]:
msg = "Reading the config file"
logger.info(msg)
config = yaml.load(open(args.config))
msg = "Reading the read length distributions"
logger.info(msg)
distribution_df = pd.read_csv(args.distribution)
msg = "Constructing sample base name"
logger.info(msg)
note_str = filenames.get_note_string(config.get('note'))
unique_str = filenames.get_unique_string(args.is_unique)
sample_name = "{}{}{}".format(args.sample, note_str, unique_str)
logger.info("sample base name: {}".format(sample_name))
msg = "Filtering read lengths"
logger.info(msg)
m_sample = distribution_df['basename'] == sample_name
m_min_read_length = distribution_df['length'] >= args.min_read_length
m_max_read_length = distribution_df['length'] <= args.max_read_length
m_to_view = m_min_read_length & m_max_read_length & m_sample
lengths_to_view = distribution_df[m_to_view]
lengths_to_view = lengths_to_view.sort_values('length')
msg = "Creating title"
logger.info(msg)
title = args.title
if (title is None) and (args.config is not None):
if 'riboseq_sample_name_map' in config:
title = config['riboseq_sample_name_map'].get(args.sample)
if args.is_unique:
title = "{}, Unique only".format(title)
msg = "Creating the bar chart"
logger.info(msg)
fig, ax = plt.subplots()
lengths_to_view.plot.bar(ax=ax, x='length', y='count', title=title, legend=False)
ax.set_xlabel('Length', fontsize=args.fontsize)
ax.set_ylabel('Count', fontsize=args.fontsize)
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))
ax.set_ylim((0, args.ymax))
mpl_utils.remove_top_and_right_splines(ax)
mpl_utils.set_title_fontsize(ax, args.fontsize)
mpl_utils.set_ticklabels_fontsize(ax, args.fontsize)
msg = "Saving plot to disk"
logger.info(msg)
if args.out is not None:
fig.savefig(args.out, bbox_inches='tight')
In [ ]: