In [172]:
cd ~/Desktop/SSUsearch/
In [173]:
mkdir -p ./workdir/clust
In [174]:
Prefix='SS' # name for the analysis run
Script_dir='./scripts'
Wkdir='./workdir'
Mcclust_jar='./external_tools/Clustering/dist/Clustering.jar'
Java_xmx='1g'
Java_gc_threads='2'
Otu_dist_cutoff='0.05'
Design='./data/test/SS.design'
In [175]:
# get absolute path
import os
Script_dir=os.path.abspath(Script_dir)
Wkdir=os.path.abspath(Wkdir)
Mcclust_jar=os.path.abspath(Mcclust_jar)
Design=os.path.abspath(Design)
New_path = '{}:{}'.format('~/Desktop/SSUsearch/external_tools/bin/', os.environ['PATH'])
print New_path
print Mcclust_jar
os.environ.update(
{'PATH':New_path,
'Prefix':Prefix,
'Script_dir': Script_dir,
'Wkdir': Wkdir,
'Mcclust_jar': Mcclust_jar,
'Java_xmx':Java_xmx,
'Java_gc_threads':Java_gc_threads,
'Otu_dist_cutoff':Otu_dist_cutoff,
'Design': Design})
In [176]:
cd ./workdir/clust
In [177]:
!sed -i 's/:/_/g' $Wkdir/*.ssu.out/*.forclust
!echo "*** Replace ':' with '_' in seq names (original illumina name has ':' in them)"
In [178]:
cat $Wkdir/*.ssu.out/*.forclust > combined_seqs.afa
In [179]:
# make group file for mcclust and mothur.
# first part of the file basename will be the group label, e.g. file "aa.bb.cc" will have "aa" as group label.
!python $Script_dir/make-groupfile.py $Prefix.groups $Wkdir/*.ssu.out/*.forclust
In [180]:
!echo "*** Starting mcclust derep"
!time java -Xmx$Java_xmx -XX:+UseParallelOldGC -XX:ParallelGCThreads=$Java_gc_threads \
-jar $Mcclust_jar derep -a -o derep.fasta \
temp.mcclust.names temp.txt combined_seqs.afa
!rm temp.txt
In [181]:
# convert mcclust names to mothur names
!python $Script_dir/mcclust2mothur_names_file.py temp.mcclust.names temp.mothur.names
In [182]:
%%bash
echo "starting preclust.."
### output: derep.precluster.fasta, derep.precluster.names
mothur "#pre.cluster(fasta=derep.fasta, diffs=1, name=temp.mothur.names)"
In [183]:
!python $Script_dir/mothur2mcclust_names_file.py derep.precluster.names $Prefix.names
In [184]:
!time java -Xmx$Java_xmx -XX:+UseParallelOldGC -XX:ParallelGCThreads=$Java_gc_threads \
-jar $Mcclust_jar dmatrix \
-l 25 -o matrix.bin -i $Prefix.names -I derep.precluster.fasta
In [185]:
!time java -Xmx$Java_xmx -XX:+UseParallelOldGC -XX:ParallelGCThreads=$Java_gc_threads \
-jar $Mcclust_jar cluster -m upgma \
-i $Prefix.names -s $Prefix.groups -o complete.clust -d matrix.bin
!python $Script_dir/mcclust2mothur-list-cutoff.py complete.clust $Prefix.list $Otu_dist_cutoff
In [186]:
!java -jar $Mcclust_jar rep-seqs -c -l -s complete.clust $Otu_dist_cutoff combined_seqs.afa
!mv complete.clust_rep_seqs.fasta otu_rep_align.fa
In [187]:
%%bash
mothur "#make.shared(list=$Prefix.list, group=$Prefix.groups, label=$Otu_dist_cutoff);"
cat $Wkdir/*.ssu.out/*.silva.taxonomy > $Prefix.taxonomy
mothur "#classify.otu(list=$Prefix.list, taxonomy=$Prefix.taxonomy, label=$Otu_dist_cutoff)"
mothur "#make.biom(shared=$Prefix.shared, constaxonomy=$Prefix.$Otu_dist_cutoff.cons.taxonomy)"
mv $Prefix.$Otu_dist_cutoff.biom $Prefix.biom
In [188]:
# clean up tempfiles
!rm -f mothur.*.logfile *rabund complete* derep.fasta matrix.bin nonoverlapping.bin temp.*
In [189]:
%%bash
#since The purpose of this tutorial is to show our new pipeline, we will skip details of community analysis with mothur
#following are some common commands in mothur
# mothur is inconsistent with the "Label" in files names. I have seens either "dummy" or "useLabel“
# "dummy" for 1.33.3; "userLabel" for 1.34.4
Label=userLabel
#Label=dummy
mothur "#make.shared(biom=$Prefix.biom); sub.sample(shared=$Prefix.shared); summary.single(calc=nseqs-coverage-sobs-chao-shannon-invsimpson); dist.shared(calc=braycurtis); pcoa(phylip=$Prefix.$Label.subsample.braycurtis.$Label.lt.dist); nmds(phylip=$Prefix.$Label.subsample.braycurtis.$Label.lt.dist); amova(phylip=$Prefix.$Label.subsample.braycurtis.$Label.lt.dist, design=$Design); tree.shared(calc=braycurtis); unifrac.weighted(tree=$Prefix.$Label.subsample.braycurtis.$Label.tre, group=$Design, random=T)"
rm -f mothur.*.logfile;
rm -f *.rabund
In [190]:
!echo "This part of pipeline finishes successfully :)"
In [ ]:
In [191]:
### some simple visualization
In [198]:
%%bash
Label=userLabel
#Label=dummy
# alpha diveristy index
python $Script_dir/plot-diversity-index.py $Label "chao,shannon,invsimpson" "c,d" "SS.$Label.subsample.groups.summary" "test" "test.alpha"
In [199]:
from IPython.display import Image
Image('test.alpha.png')
Out[199]:
In [200]:
# taxon distribution
!python $Script_dir/plot-taxa-count.py 2 test.taxa.dist ../*.ssu.out/*.silva.taxonomy.count
In [195]:
from IPython.display import Image
Image('test.taxa.dist.png')
Out[195]:
In [203]:
%%bash
Label=userLabel
#Label=dummy
# ordination
python $Script_dir/plot-pcoa.py SS.$Label.subsample.braycurtis.$Label.lt.pcoa.axes SS.$Label.subsample.braycurtis.$Label.lt.pcoa.loadings test.beta.pcoa
In [204]:
from IPython.display import Image
Image('test.beta.pcoa.png')
Out[204]:
In [ ]: