Dual CRISPR Screen Analysis

Step 5: Count Plots

Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)

Instructions

To run this notebook reproducibly, follow these steps:

  1. Click Kernel > Restart & Clear Output
  2. When prompted, click the red Restart & clear all outputs button
  3. Fill in the values for your analysis for each of the variables in the Input Parameters section
  4. Click Cell > Run All

Input Parameters


In [ ]:
g_dataset_name = "Notebook5Test"
g_fastq_counts_run_prefix = "TestSet5"
g_fastq_counts_dir = '~/dual_crispr/test_data/test_set_5'
g_collapsed_counts_run_prefix = ""
g_collapsed_counts_dir = ""
g_combined_counts_run_prefix = ""
g_combined_counts_dir = ""
g_plots_run_prefix = ""
g_plots_dir = '~/dual_crispr/test_outputs/test_set_5'

Automated Set-Up


In [ ]:
import inspect

import ccbb_pyutils.analysis_run_prefixes as ns_runs
import ccbb_pyutils.files_and_paths as ns_files
import ccbb_pyutils.notebook_logging as ns_logs


def describe_var_list(input_var_name_list):
    description_list =  ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
    return "".join(description_list)


ns_logs.set_stdout_info_logger()

In [ ]:
g_fastq_counts_dir = ns_files.expand_path(g_fastq_counts_dir)
g_collapsed_counts_run_prefix = ns_runs.check_or_set(g_collapsed_counts_run_prefix, g_fastq_counts_run_prefix)
g_collapsed_counts_dir = ns_files.expand_path(ns_runs.check_or_set(g_collapsed_counts_dir, g_fastq_counts_dir))
g_combined_counts_run_prefix = ns_runs.check_or_set(g_combined_counts_run_prefix, g_collapsed_counts_run_prefix)
g_combined_counts_dir = ns_files.expand_path(ns_runs.check_or_set(g_combined_counts_dir, g_collapsed_counts_dir))
g_plots_run_prefix = ns_runs.check_or_set(g_plots_run_prefix, ns_runs.generate_run_prefix(g_dataset_name))
g_plots_dir = ns_files.expand_path(ns_runs.check_or_set(g_plots_dir, g_combined_counts_dir))

print(describe_var_list(['g_fastq_counts_dir', 'g_collapsed_counts_run_prefix','g_collapsed_counts_dir',  
                         'g_combined_counts_run_prefix', 'g_combined_counts_dir', 
                         'g_plots_run_prefix', 'g_plots_dir']))

ns_files.verify_or_make_dir(g_collapsed_counts_dir)
ns_files.verify_or_make_dir(g_combined_counts_dir)
ns_files.verify_or_make_dir(g_plots_dir)

In [ ]:
%matplotlib inline

Count File Suffixes


In [ ]:
import dual_crispr.construct_counter as ns_counter
print(inspect.getsource(ns_counter.get_counts_file_suffix))

In [ ]:
import dual_crispr.count_combination as ns_combine
print(inspect.getsource(ns_combine.get_collapsed_counts_file_suffix))
print(inspect.getsource(ns_combine.get_combined_counts_file_suffix))

Count Plots Functions


In [ ]:
import dual_crispr.count_plots as ns_plot
print(inspect.getsource(ns_plot))

Individual fastq Plots


In [ ]:
print(ns_files.check_file_presence(g_fastq_counts_dir, g_fastq_counts_run_prefix, 
                                   ns_counter.get_counts_file_suffix(),
                                   check_failure_msg="Count plots could not detect any individual fastq count files."))

In [ ]:
ns_plot.plot_raw_counts(g_fastq_counts_dir, g_fastq_counts_run_prefix, ns_counter.get_counts_file_suffix(), 
    g_plots_dir, g_plots_run_prefix, ns_plot.get_boxplot_suffix())

Individual Sample Plots


In [ ]:
print(ns_files.check_file_presence(g_collapsed_counts_dir, g_collapsed_counts_run_prefix, 
                                   ns_combine.get_collapsed_counts_file_suffix(),
                                   check_failure_msg="Count plots could not detect any individual sample count files.")
     )

In [ ]:
ns_plot.plot_raw_counts(g_collapsed_counts_dir, g_collapsed_counts_run_prefix, 
    ns_combine.get_collapsed_counts_file_suffix(), g_plots_dir, g_plots_run_prefix, ns_plot.get_boxplot_suffix())

Combined Samples Plot


In [ ]:
print(ns_files.check_file_presence(g_combined_counts_dir, g_combined_counts_run_prefix, 
                                   ns_combine.get_combined_counts_file_suffix(),
                                   check_failure_msg="Count plots could not detect a combined count file."))

In [ ]:
ns_plot.plot_combined_raw_counts(g_combined_counts_dir, g_combined_counts_run_prefix, 
    ns_combine.get_combined_counts_file_suffix(), g_plots_dir, g_plots_run_prefix, ns_plot.get_boxplot_suffix())