In [19]:
import os
baseDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/'
genomeDir = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/'
rnammerDir = os.path.join(baseDir, 'rnammer')
In [20]:
import glob
from IPython.display import Image
%load_ext rpy2.ipython
In [21]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [22]:
if not os.path.isdir(rnammerDir):
os.makedirs(rnammerDir)
In [15]:
%%bash -s "$genomeDir" "$rnammerDir"
find $1 -name "*.fna" | \
perl -pe 's/.+\/|\.fna//g' | \
xargs -n 1 -I % -P 30 bash -c \
"rnammer -S bac -m ssu -gff $2/%_rrn.gff -f $2/%_rrn.fna -xml $2/%_rrn.xml < $1/%.fna"
In [16]:
## Summarizing the results
!cd $rnammerDir; \
egrep -v "^#" *.gff | \
grep "16s_rRNA" | \
perl -pe 's/:/\t/' > ssu_summary.txt
In [17]:
%%bash -s "$rnammerDir"
cd $1
printf "ssu gene length distribution:\n"
cut -d $'\t' -f 7 ssu_summary.txt | NY_misc_perl stats_descriptive
In [18]:
! cd $rnammerDir; \
cat *_rrn.fna > bac_genome1147_16S.fna
!printf "Number of sequences: "
! cd $rnammerDir; \
grep -c ">" bac_genome1147_16S.fna
In [ ]: