author: josenavasmolina@gmail.com
date: 25 Sep 2017
license: BSD3

closed_reference_otu_picking.ipynb

Commands to execute closed reference OTU picking


In [ ]:
from os.path import join

adaptor_cleanup_dir = '/path/to/output/cleanup_dir/'
closed_ref_output = '/path/to/closed_ref_output/'
gg_ref_fna = '/path/to/greengenes/97_otus.fasta'
gg_ref_tax = '/path/to/greengenes/97_otu_taxonomy.txt'

silva_ref_fna = '/path/to/silva/Silva_123_rep_set97.fna'
silva_ref_tax = '/path/to/silva/taxonomy_97_7_levels.txt'

Greengenes 13.8


In [ ]:
gg_cr_dp = join(closed_ref_output, '01-closed-ref-greengenes')
gg_cr_ps_dp = join(gg_cr_dp, 'per_study')

In [ ]:
%%bash

mkdir -p $gg_cr_dp
mkdir -p $gg_cr_ps_dp

# Index the Reference sequence file to reduce 20 min of processing per study
indexdb_rna --ref $gg_ref_fna,$gg_cr_ps_dp/97_otus.idx --max_pos 10000

echo -e "pick_otus:otu_picking_method sortmerna\npick_otus:threads 31\npick_otus:sortmerna_db $gg_cr_ps_dp/97_otus.idx" > $gg_cr_ps_dp/cr_params.txt

for i in `ls $adaptor_cleanup_dir`
do
    pick_closed_reference_otus.py -i $adaptor_cleanup_dir \
                                  -o $gg_cr_ps_dp/$i \
                                  -r $gg_ref_fna \
                                  -t $gg_ref_tax \
                                  -p $gg_cr_ps_dp/cr_params.txt
done

# Merge all the individual tables
tables=''
for i in `ls $gg_cr_ps_dp/*/otu_table.biom`
do
    tables=$tables','$i
done
tables=${tables:1}

parallel_merge_otu_tables.py -i $tables -o $gg_cr_dp/merged
cp $gg_cr_dp/merged/merged.biom $gg_cr_dp/emp_cr_gg_13_8.biom

Silva123


In [ ]:
silva_cr_dp = join(closed_ref_output, '01-closed-ref-silva-16S')
silva_cr_ps_dp = join(silva_cr_dp, 'per_study')

In [ ]:
%%bash

mkdir -p $silva_cr_dp
mkdir -p $silva_cr_ps_dp

# Index the reference sequence file to reduce 20 min of processing per study
indexdb_rna --ref $silva_ref_fna,$silva_cr_ps_dp/97_otus.idx --max_pos 10000

echo -e "pick_otus:otu_picking_method sortmerna\npick_otus:threads 31\npick_otus:sortmerna_db $silva_cr_ps_dp/97_otus.idx" > $silva_cr_ps_dp/cr_params.txt

for i in `ls $adaptor_cleanup_dir`
do
    pick_closed_reference_otus.py -i $adaptor_cleanup_dir \
                                  -o $silva_cr_ps_dp/$i \
                                  -r $silva_ref_fna \
                                  -t $silva_ref_tax \
                                  -p $silva_cr_ps_dp/cr_params.txt
done

# Merge all the individual tables
tables=''
for i in `ls $silva_cr_ps_dp/*/otu_table.biom`
do
    tables=$tables','$i
done
tables=${tables:1}

parallel_merge_otu_tables.py -i $tables -o $silva_cr_dp/merged
cp $silva_cr_dp/merged/merged.biom $silva_cr_dp/emp_cr_silva_16S_123.biom