In [ ]:
from os.path import join
adaptor_cleanup_dir = '/path/to/output/cleanup_dir/'
closed_ref_output = '/path/to/closed_ref_output/'
gg_ref_fna = '/path/to/greengenes/97_otus.fasta'
gg_ref_tax = '/path/to/greengenes/97_otu_taxonomy.txt'
silva_ref_fna = '/path/to/silva/Silva_123_rep_set97.fna'
silva_ref_tax = '/path/to/silva/taxonomy_97_7_levels.txt'
In [ ]:
gg_cr_dp = join(closed_ref_output, '01-closed-ref-greengenes')
gg_cr_ps_dp = join(gg_cr_dp, 'per_study')
In [ ]:
%%bash
mkdir -p $gg_cr_dp
mkdir -p $gg_cr_ps_dp
# Index the Reference sequence file to reduce 20 min of processing per study
indexdb_rna --ref $gg_ref_fna,$gg_cr_ps_dp/97_otus.idx --max_pos 10000
echo -e "pick_otus:otu_picking_method sortmerna\npick_otus:threads 31\npick_otus:sortmerna_db $gg_cr_ps_dp/97_otus.idx" > $gg_cr_ps_dp/cr_params.txt
for i in `ls $adaptor_cleanup_dir`
do
pick_closed_reference_otus.py -i $adaptor_cleanup_dir \
-o $gg_cr_ps_dp/$i \
-r $gg_ref_fna \
-t $gg_ref_tax \
-p $gg_cr_ps_dp/cr_params.txt
done
# Merge all the individual tables
tables=''
for i in `ls $gg_cr_ps_dp/*/otu_table.biom`
do
tables=$tables','$i
done
tables=${tables:1}
parallel_merge_otu_tables.py -i $tables -o $gg_cr_dp/merged
cp $gg_cr_dp/merged/merged.biom $gg_cr_dp/emp_cr_gg_13_8.biom
In [ ]:
silva_cr_dp = join(closed_ref_output, '01-closed-ref-silva-16S')
silva_cr_ps_dp = join(silva_cr_dp, 'per_study')
In [ ]:
%%bash
mkdir -p $silva_cr_dp
mkdir -p $silva_cr_ps_dp
# Index the reference sequence file to reduce 20 min of processing per study
indexdb_rna --ref $silva_ref_fna,$silva_cr_ps_dp/97_otus.idx --max_pos 10000
echo -e "pick_otus:otu_picking_method sortmerna\npick_otus:threads 31\npick_otus:sortmerna_db $silva_cr_ps_dp/97_otus.idx" > $silva_cr_ps_dp/cr_params.txt
for i in `ls $adaptor_cleanup_dir`
do
pick_closed_reference_otus.py -i $adaptor_cleanup_dir \
-o $silva_cr_ps_dp/$i \
-r $silva_ref_fna \
-t $silva_ref_tax \
-p $silva_cr_ps_dp/cr_params.txt
done
# Merge all the individual tables
tables=''
for i in `ls $silva_cr_ps_dp/*/otu_table.biom`
do
tables=$tables','$i
done
tables=${tables:1}
parallel_merge_otu_tables.py -i $tables -o $silva_cr_dp/merged
cp $silva_cr_dp/merged/merged.biom $silva_cr_dp/emp_cr_silva_16S_123.biom