Simulating CsCl gradient to reproduce the results in:
Lueders T, Manefield M, Friedrich MW. (2004). Enhanced sensitivity of DNA- and rRNA-based stable isotope probing by fractionation and quantitative analysis of isopycnic centrifugation gradients. Environmental Microbiology 6:73–78.
rotor:
In [30]:
workDir = "/home/nick/notebook/SIPSim/t/M.bark_M.ext/"
In [1]:
import os
import sys
%load_ext rpy2.ipython
In [2]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
In [4]:
!cd $workDir; \
seqDB_tools accession-GI2fasta < M.barkeri_refseq.txt > M.barkeri.fna
In [5]:
!cd $workDir; \
seqDB_tools accession-GI2fasta < M.extorquens_AM1_refseq.txt > M.extorquens_AM1.fna
In [12]:
# renaming genome sequences
!cd $workDir; \
find . -name "*_rn.fna" |\
xargs -I % rm -f %
!cd $workDir; \
find . -name "*.fna" |\
perl -pe 's/\.fna$//' | \
xargs -P 2 -I % bash -c \
"SIPSim renameGenomes %.fna > %_rn.fna"
In [13]:
# list of all genomes files and their associated names
!cd $workDir; \
find . -name "*_rn.fna" | \
perl -pe 's/.+\///' | \
perl -pe 's/(.+)(\.[^.]+)/\$1\t\$1\$2/' > genomes_all_list.txt
!cd $workDir; head genomes_all_list.txt
In [15]:
!cd $workDir; \
SIPSim indexGenomes genomes_all_list.txt \
--np 2 > index_log.txt
In [26]:
!cd $workDir; \
SIPSim gradientComms \
--n_comm 1 \
--abund_dist uniform \
--abund_dist_p low:1,high:1 \
genomes_all_list.txt > comm-n1-unif.txt
!cd $workDir; tail comm-n1-unif.txt
In [27]:
# making config file
config = """
[library 1]
# baseline: no incorp
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
"""
outfile = os.path.join(workDir, 'incorp.config')
outf = open(outfile, 'wb')
outf.write(config)
outf.close()
In [28]:
!cd $workDir; \
SIPSim isoIncorp \
comm-n1-unif.txt incorp.config \
> incorp-n1-unif.txt
!cd $workDir; head incorp-n1-unif.txt
In [87]:
%%bash -s "$workDir"
# adding incorp 100% library for N.extorquens
cd $1
cat incorp-n1-unif.txt \
<(tail -n +2 incorp-n1-unif.txt | \
perl -pe 's/^1/2/' | \
perl -pe 's/0\.0/100.0/ if /M.extorquens/') \
> incorp-n2-unif.txt
head incorp-n2-unif.txt
In [90]:
!cd $workDir; \
SIPSim gradientComms \
--n_comm 2 \
--abund_dist uniform \
--abund_dist_p low:1,high:1 \
genomes_all_list.txt > comm-n2-unif.txt
!cd $workDir; tail comm-n2-unif.txt
In [97]:
!cd $workDir; \
SIPSim fractions \
comm-n2-unif.txt \
> fracs-n2-unif.txt
!cd $workDir; head fracs-n2-unif.txt
In [38]:
!cd $workDir;\
SIPSim fragGC \
genomes_all_list.txt \
--flr 500,None \
--fld skewed-normal,9000,2500,-5 \
--nf 50x \
--np 24 \
2> shotFragGC_skewN90-25-n5-nS.log \
> shotFragGC_skewN90-25-n5-nS.pkl
!cd $workDir; head shotFragGC_skewN90-25-n5-nS.log
In [ ]:
!cd $workDir; \
SIPSim OTU_sim \
shotFragGC_skewN90-25-n5-nS.pkl \
comm-n2-unif.txt \
incorp-n2-unif.txt \
fracs-n2-unif.txt \
--abs_abund 2e8 \
2> OTU-n2-unif_skewN90-25-n5-nS_A2e9.log \
> OTU-n2-unif_skewN90-25-n5-nS_A2e9.txt
!cd $workDir; head OTU-n2-unif_skewN90-25-n5-nS_A2e9.log
In [ ]:
%%R -i workDir
# loading file
inFiles = c('OTU-n2-unif_skewN90-25-n5-nS_A2e9.txt')
inFiles = sapply(inFiles, function(x){
x = as.character(x)
paste(c(workDir, x), collapse='/')
})
tbls = list()
for (fileName in inFiles){
tbls[[fileName]] = read.csv(fileName, sep='\t')
}
tbl = do.call(rbind, tbls)
tbl$abs_abund = as.numeric(gsub('.+-nS_A|\\.txt\\.[0-9]+', '', rownames(tbl)))
tbl = tbl %>%
filter(!grepl('inf', fractions, ignore.case=T)) %>%
separate(fractions, into = c('BD_min','BD_max'), sep='-', convert=TRUE) %>%
filter(BD_min != 1.795)
In [ ]:
%%R
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66
In [ ]:
%%R -w 800 -h 400
# plotting absolute abundances
tbl.s = tbl %>%
mutate(BD_mean = (BD_min + BD_max) / 2) %>%
group_by(abs_abund, BD_mean, library, taxon) %>%
summarize(total_count = sum(count))
## plot
p = ggplot(tbl.s, aes(BD_mean, total_count, shape=taxon, color=taxon)) +
geom_point() +
geom_line() +
scale_x_continuous(limits=c(1.68,1.78), breaks=seq(1.68,1.78,0.02)) +
labs(x='Buoyant density') +
facet_grid(library ~ .) +
theme(
text = element_text(size=16)
)
p
In [ ]:
In [ ]: