In [66]:
cd /usr/local/notebooks
In [35]:
mkdir -p ./workdir/clust
In [44]:
Prefix='SS' # name for the analysis run
Script_dir='./SSUsearch/scripts'
Wkdir='./workdir'
Mcclust_jar='./external_tools/Clustering/dist/Clustering.jar'
Java_xmx='10g'
Java_gc_threads='2'
Otu_dist_cutoff='0.05'
Design='./data/test/SS.design'
In [62]:
# get absolute path
import os
Script_dir=os.path.abspath(Script_dir)
Wkdir=os.path.abspath(Wkdir)
Mcclust_jar=os.path.abspath(Mcclust_jar)
Design=os.path.abspath(Design)
os.environ.update(
{'Prefix':Prefix,
'Script_dir': Script_dir,
'Wkdir': Wkdir,
'Mcclust_jar': Mcclust_jar,
'Java_xmx':Java_xmx,
'Java_gc_threads':Java_gc_threads,
'Otu_dist_cutoff':Otu_dist_cutoff,
'Design': Design})
In [67]:
cd ./workdir/clust
In [ ]:
!sed -i 's/:/_/g' $Wkdir/*.ssu.out/*.forclust
!echo "*** Replace ':' with '_' in seq names (original illumina name has ':' in them)"
In [48]:
cat $Wkdir/*.ssu.out/*.forclust > combined_seqs.afa
In [49]:
# make group file for mcclust and mothur.
# first part of the file basename will be the group label, e.g. file "aa.bb.cc" will have "aa" as group label.
!python $Script_dir/make-groupfile.py $Prefix.groups $Wkdir/*.ssu.out/*.forclust
In [50]:
!echo "*** Starting mcclust derep"
!time java -Xmx$Java_xmx -XX:+UseParallelOldGC -XX:ParallelGCThreads=$Java_gc_threads \
-jar $Mcclust_jar derep -a -o derep.fasta \
temp.mcclust.names temp.txt combined_seqs.afa
!rm temp.txt
In [51]:
# convert mcclust names to mothur names
!python $Script_dir/mcclust2mothur_names_file.py temp.mcclust.names temp.mothur.names
In [53]:
!echo "starting preclust.."
### output: derep.precluster.fasta, derep.precluster.names
!mothur "#pre.cluster(fasta=derep.fasta, diffs=1, name=temp.mothur.names)"
In [54]:
!python $Script_dir/mothur2mcclust_names_file.py derep.precluster.names $Prefix.names
In [55]:
!time java -Xmx$Java_xmx -XX:+UseParallelOldGC -XX:ParallelGCThreads=$Java_gc_threads \
-jar $Mcclust_jar dmatrix \
-l 25 -o matrix.bin -i $Prefix.names -I derep.precluster.fasta
In [56]:
!time java -Xmx$Java_xmx -XX:+UseParallelOldGC -XX:ParallelGCThreads=$Java_gc_threads \
-jar $Mcclust_jar cluster -m upgma \
-i $Prefix.names -s $Prefix.groups -o complete.clust -d matrix.bin
!python $Script_dir/mcclust2mothur-list-cutoff.py complete.clust $Prefix.list $Otu_dist_cutoff
In [57]:
!java -jar $Mcclust_jar rep-seqs -c -l -s complete.clust $Otu_dist_cutoff combined_seqs.afa
!mv complete.clust_rep_seqs.fasta otu_rep_align.fa
In [58]:
!mothur "#make.shared(list=$Prefix.list, group=$Prefix.groups, label=$Otu_dist_cutoff);"
!cat $Wkdir/*.ssu.out/*.silva.taxonomy > $Prefix.taxonomy
!mothur "#classify.otu(list=$Prefix.list, taxonomy=$Prefix.taxonomy, label=$Otu_dist_cutoff)"
!mothur "#make.biom(shared=$Prefix.shared, constaxonomy=$Prefix.$Otu_dist_cutoff.cons.taxonomy)"
!mv $Prefix.$Otu_dist_cutoff.biom $Prefix.biom
In [59]:
# clean up tempfiles
!rm -f mothur.*.logfile *rabund complete* derep.fasta matrix.bin nonoverlapping.bin temp.*
In [60]:
%%bash
#since The purpose of this tutorial is to show our new pipeline, we will skip details of community analysis with mothur
#following are some common commands in mothur
# mothur is inconsistent with the "Label" in files names. I have seens either "dummy" or "useLabel“
# "dummy" for 1.33.3; "userLabel" for 1.34.4
Label=userLabel
#Label=dummy
mothur "#make.shared(biom=$Prefix.biom); sub.sample(shared=$Prefix.shared); summary.single(calc=nseqs-coverage-sobs-chao-shannon-invsimpson); dist.shared(calc=braycurtis); pcoa(phylip=$Prefix.$Label.subsample.braycurtis.$Label.lt.dist); nmds(phylip=$Prefix.$Label.subsample.braycurtis.$Label.lt.dist); amova(phylip=$Prefix.$Label.subsample.braycurtis.$Label.lt.dist, design=$Design); tree.shared(calc=braycurtis); unifrac.weighted(tree=$Prefix.$Label.subsample.braycurtis.$Label.tre, group=$Design, random=T)"
rm -f mothur.*.logfile;
rm -f *.rabund
In [61]:
!echo "This part of pipeline finishes successfully :)"
In [ ]:
In [ ]:
### some simple visualization
In [91]:
%%bash
Label=userLabel
#Label=dummy
# alpha diveristy index
python $Script_dir/plot-diversity-index.py $Label "chao,shannon,invsimpson" "c,d" "SS.$Label.subsample.groups.summary" "test" "test.alpha"
In [93]:
from IPython.display import Image
Image('test.alpha.png')
Out[93]:
In [87]:
# taxon distribution
!python $Script_dir/plot-taxa-count.py 2 test.taxa.dist ../*.ssu.out/*.silva.taxonomy.count
In [89]:
from IPython.display import Image
Image('test.taxa.dist.png')
Out[89]:
In [101]:
%%bash
Label=userLabel
#Label=dummy
# ordination
python $Script_dir/plot-pcoa.py SS.$Label.subsample.braycurtis.$Label.lt.pcoa.axes SS.$Label.subsample.braycurtis.$Label.lt.pcoa.loadings test.beta.pcoa
In [102]:
from IPython.display import Image
Image('test.beta.pcoa.png')
Out[102]:
In [ ]: