Based on:
Suzuki MT, Giovannoni SJ. (1996). Bias caused by template annealing in the amplification of mixtures of 16S rRNA genes by PCR. Appl Environ Microbiol 62:625–630.
In [2]:
# dirs
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome3/PCR_sim/'
genomeDir = '/home/nick/notebook/SIPSim/dev/bac_genome3/genomes/'
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
# input files
genomeIndexFile = '/home/nick/notebook/SIPSim/dev/bac_genome3/genomes/genome_index.txt'
fragBD_file = '/home/nick/notebook/SIPSim/dev/bac_genome3/validation/ampFrags_real_kde_dif.pkl'
# params
nprocs = 24
In [3]:
%load_ext rpy2.ipython
In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [7]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
In [8]:
commFile = 'comm_n1.txt'
!cd $workDir; \
SIPSim communities \
$genomeIndexFile \
> $commFile
!cd $workDir; \
head $commFile
In [9]:
incorpConfigFile = 'PT0_PI0.config'
!cd $workDir; \
SIPSim incorpConfigExample \
--percTaxa 0 \
--percIncorpUnif 0 \
> $incorpConfigFile
!cd $workDir; \
head $incorpConfigFile
In [10]:
wIncorpFile = os.path.splitext(fragBD_file)[0] + '_incorp.pkl'
!cd $workDir; \
SIPSim isotope_incorp \
$fragBD_file \
$incorpConfigFile \
--comm $commFile \
--np $nprocs \
> $wIncorpFile
!cd $workDir; \
ls -thlc $wIncorpFile
In [11]:
fracsFile = 'fracs.txt'
!cd $workDir; \
SIPSim gradient_fractions \
comm_n1.txt \
> $fracsFile
!cd $workDir; \
head -n 4 $fracsFile
In [125]:
otuTableFile = 'OTU_abs1e9.txt'
!cd $workDir; \
SIPSim OTU_table \
$wIncorpFile \
$commFile \
$fracsFile \
--abs 1e9 \
--np $nprocs \
> $otuTableFile
!cd $workDir; \
head -n 4 $otuTableFile
In [126]:
%%R -i workDir -i otuTableFile
setwd(workDir)
tbl.otu = read.delim(otuTableFile, sep='\t')
tbl.otu %>% head(n=3)
In [127]:
%%R -w 900 -h 300
ggplot(tbl.otu, aes(BD_mid, rel_abund, fill=taxon)) +
geom_area(stat='identity') +
labs(x = 'Buoyant density', y = 'Relative abundance') +
theme_bw() +
theme(
text = element_text(size=16)
)
5 * 1e-9 * (1 / 499.5) * (1 / (30 * 1e-6))
In [216]:
%%R
rxn_eff = function(M_n, P_n=1e-6, f_0=1, k=1){
b = k * M_n + P_n
f_n = f_0 * (P_n / b)
return(f_n)
}
M_n = c(1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3)
f_n = sapply(M_n, rxn_eff)
df = data.frame(M_n = M_n, f_n = f_n)
ggplot(df, aes(M_n, f_n)) +
geom_point() +
geom_line() +
scale_x_log10() +
theme(
text = element_text(size=16)
)
In [246]:
otu_PCR_file = 'OTU_abs1e9_PCR.txt'
!cd $workDir; \
SIPSim OTU_PCR \
$otuTableFile \
> $otu_PCR_file
!cd $workDir; \
head -n 4 $otu_PCR_file
In [247]:
%%R -i workDir -i otu_PCR_file -i otuTableFile
setwd(workDir)
tbl.otu = read.delim(otuTableFile, sep='\t')
tbl.otu.g = tbl.otu %>% gather('type', 'value', c(count, rel_abund))
tbl.otu.PCR = read.delim(otu_PCR_file, sep='\t') %>%
select(-count)
tbl.otu.PCR.g = tbl.otu.PCR %>% gather('type', 'value', c(rel_abund)) %>%
mutate(type = 'rel_abund_PCR')
#tbl.otu.PCR.g %>% head(n=3) %>% print
tbl.otu.c = rbind(tbl.otu.g, tbl.otu.PCR.g)
tbl.otu.c %>% head(n=3)
In [248]:
%%R -w 900 -h 400
ggplot(tbl.otu.c, aes(BD_mid, value, fill=taxon)) +
geom_area() +
facet_grid(type ~ ., scales='free_y') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [249]:
%%R -w 900 -h 400
tbl.otu.c.f = tbl.otu.c %>%
filter(type != 'count')
tbl.otu.c.f$taxon = gsub('^([a-zA-Z0-9_]{10,15})_', '\\1\n', tbl.otu.c.f$taxon)
tbl.otu.c.f$taxon = gsub('\n([a-zA-Z0-9_]{10,15})_', '\n\\1\n', tbl.otu.c.f$taxon)
ggplot(tbl.otu.c.f, aes(BD_mid, value, fill=type)) +
geom_area(position='dodge', alpha=0.5) +
facet_grid(taxon ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [250]:
otu_sub_file = 'OTU_abs1e9_sub.txt'
!cd $workDir; \
SIPSim OTU_subsample \
$otuTableFile \
> $otu_sub_file
!cd $workDir; \
head -n 4 $otu_sub_file
In [251]:
otu_PCR_sub_file = 'OTU_abs1e9_PCR_sub.txt'
!cd $workDir; \
SIPSim OTU_subsample \
$otu_PCR_file \
> $otu_PCR_sub_file
!cd $workDir; \
head -n 4 $otu_PCR_sub_file
In [252]:
%%R -i workDir -i otu_sub_file -i otu_PCR_sub_file
setwd(workDir)
tbl.otu1 = read.delim(otu_sub_file, sep='\t') %>%
mutate(file = 'no_PCR')
tbl.otu2 = read.delim(otu_PCR_sub_file, sep='\t') %>%
mutate(file = 'PCR')
tbl.otu = rbind(tbl.otu1, tbl.otu2)
tbl.otu %>% head(n=3)
In [253]:
%%R -w 900 -h 400
ggplot(tbl.otu, aes(BD_mid, rel_abund, fill=taxon)) +
geom_area() +
facet_grid(file ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [ ]: