Prepare the environment

First we'll import various functions that we'll need for generating the report and configure the environment.


In [1]:
from os.path import join, expandvars
from joblib import Parallel, delayed

from tax_credit.framework_functions import (runtime_make_test_data,
                                            runtime_make_commands,
                                            clock_runtime,
                                           )

In [2]:
## project_dir should be the directory where you've downloaded (or cloned) the 
## short-read-tax-assignment repository. 
project_dir = expandvars("$HOME/Desktop/projects/short-read-tax-assignment")
data_dir = join(project_dir, "data")

results_dir = expandvars("$HOME/Desktop/projects/tax-credit-runtime")
runtime_results = join(results_dir, 'runtime_results.txt')
tmpdir = join(results_dir, 'tmp')

ref_db_dir = expandvars("$HOME/Desktop/projects/short-read-tax-assignment/data/ref_dbs/")
ref_seqs = join(ref_db_dir, 'gg_13_8_otus/99_otus_clean_515f-806r_trim250.fasta')
ref_taxa = join(ref_db_dir, 'gg_13_8_otus/99_otu_taxonomy_clean.tsv')

num_iters = 1
sampling_depths = [1, 10, 100, 1000, 10000]

Generate test datasets

Subsample reference sequences to create a series of test datasets and references.


In [44]:
runtime_make_test_data(ref_seqs, tmpdir, sampling_depths)

Preparing the method/parameter combinations

Finally we define the method, parameter combintations that we want to test and command templates to execute.

Template fields must adhere to following format:

                  {0} = output directory
                  {1} = input data
                  {2} = reference sequences
                  {3} = reference taxonomy
                  {4} = method name
                  {5} = other parameters

In [48]:
qiime1_template = 'source activate qiime1; source ~/.bashrc; assign_taxonomy.py -i {1} -o {0} -r {2} -t {3} -m {4} {5}'
blast_template = 'source activate qiime2-2017.2; qiime tools import --input-path {1} --output-path {1}.qza --type "FeatureData[Sequence]"; qiime tools import --input-path {2} --output-path {2}.qza --type "FeatureData[Sequence]"; qiime tools import --input-path {3} --output-path {3}.qza --type "FeatureData[Taxonomy]"; qiime feature-classifier blast --i-query {1}.qza --o-classification {0}/assign.tmp --i-reference-reads {2}.qza --i-reference-taxonomy {3}.qza {5}'
vsearch_template = 'source activate qiime2-2017.2; qiime tools import --input-path {1} --output-path {1}.qza --type "FeatureData[Sequence]"; qiime tools import --input-path {2} --output-path {2}.qza --type "FeatureData[Sequence]"; qiime tools import --input-path {3} --output-path {3}.qza --type "FeatureData[Taxonomy]"; qiime feature-classifier vsearch --i-query {1}.qza --o-classification {0}/assign.tmp --i-reference-reads {2}.qza --i-reference-taxonomy {3}.qza {5}'

# {method: template, method-specific params}
methods = {
           'rdp': (qiime1_template, '--confidence 0.5 --rdp_max_memory 16000'),
           'uclust': (qiime1_template, '--min_consensus_fraction 0.51 --similarity 0.8 --uclust_max_accepts 3'),
           'sortmerna': (qiime1_template, '--sortmerna_e_value 0.001 --min_consensus_fraction 0.51 --similarity 0.8 '
                         '--sortmerna_best_N_alignments 3 --sortmerna_coverage 0.8'),
           'blast' : (qiime1_template, '-e 0.001'),
           'blast+' : (blast_template, '--p-evalue 0.001'),
           'vsearch' : (vsearch_template, '--p-min-id 0.90')
          }

Generate the list of commands and run them

First we will vary the size of the reference database and search a single sequence against it.


In [49]:
commands_a = runtime_make_commands(tmpdir, tmpdir, methods, ref_taxa,
                                   sampling_depths, num_iters=1, subsample_ref=True)

Next, we will vary the number of query seqs, and keep the number of ref seqs constant


In [50]:
commands_b = runtime_make_commands(tmpdir, tmpdir, methods, ref_taxa,
                                   sampling_depths, num_iters=1, subsample_ref=False)

Let's look at the first command in each list and the total number of commands as a sanity check...


In [51]:
print(len(commands_a + commands_b))
print(commands_a[0])
print(commands_b[4])


60
('source activate qiime1; source ~/.bashrc; assign_taxonomy.py -i /Users/nbokulich/Desktop/projects/tax-credit-runtime/tmp/1.fna -o /Users/nbokulich/Desktop/projects/tax-credit-runtime/tmp -r /Users/nbokulich/Desktop/projects/tax-credit-runtime/tmp/1.fna -t /Users/nbokulich/Desktop/projects/short-read-tax-assignment/data/ref_dbs/gg_13_8_otus/99_otu_taxonomy_clean.tsv -m blast -e 0.001', 'blast', '1', '1', 0)
('source activate qiime1; source ~/.bashrc; assign_taxonomy.py -i /Users/nbokulich/Desktop/projects/tax-credit-runtime/tmp/10000.fna -o /Users/nbokulich/Desktop/projects/tax-credit-runtime/tmp -r /Users/nbokulich/Desktop/projects/tax-credit-runtime/tmp/10000.fna -t /Users/nbokulich/Desktop/projects/short-read-tax-assignment/data/ref_dbs/gg_13_8_otus/99_otu_taxonomy_clean.tsv -m blast -e 0.001', 'blast', '10000', '10000', 0)

In [54]:
Parallel(n_jobs=4)(delayed(clock_runtime)(command, runtime_results, force=False) for command in (list(set(commands_a + commands_b))))


Out[54]:
[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [ ]: