In [381]:
import os
import glob
import re
import nestly
%load_ext rpy2.ipython
%load_ext pushnote
In [382]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66
In [383]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1147/qSIP/'
buildDir = os.path.join(workDir, 'default_rep3')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags.pkl'
genomeIndex = '/var/seq_data/ncbi_db/genome/Jan2016/bac_complete_spec-rep1_rn/genome_index.txt'
# simulation parameters
prefrac_comm_abundance = '1e9'
n_gradient_reps = 3
nprocs = 24
In [390]:
# building tree structure
nest = nestly.Nest()
## varying params
nest.add('abs', [prefrac_comm_abundance])
## set params
nest.add('percIncorp', [100], create_dir=False) # percent atom excess
nest.add('percTaxa', [0], create_dir=False) # overidden by 'nIncorporators'
nest.add('nIncorporators', [114], create_dir=False) # number of incorporators
nest.add('n_gradient_reps', [n_gradient_reps], create_dir=False)
nest.add('np', [nprocs], create_dir=False)
### input/output files
nest.add('buildDir', [buildDir], create_dir=False)
nest.add('R_dir', [R_dir], create_dir=False)
nest.add('fragFile', [fragFile], create_dir=False)
nest.add('genomeIndex', [genomeIndex], create_dir=False)
# building directory tree
nest.build(buildDir)
# bash file to run
bashFile = os.path.join(buildDir, 'SIPSimRun.sh')
In [391]:
%%writefile $bashFile
#!/bin/bash
export PATH={R_dir}:$PATH
echo '#-- SIPSim pipeline --#'
echo '# converting fragments to KDE'
SIPSim fragment_KDE \
{fragFile} \
> ampFrags_KDE.pkl
echo '# adding diffusion'
SIPSim diffusion \
ampFrags_KDE.pkl \
--np {np} \
> ampFrags_KDE_dif.pkl
echo '# adding DBL contamination'
SIPSim DBL \
ampFrags_KDE_dif.pkl \
--np {np} \
> ampFrags_KDE_dif_DBL.pkl
echo '# making incorp file'
SIPSim incorpConfigExample \
--percTaxa {percTaxa} \
--percIncorpUnif {percIncorp} \
--n_reps 3 \
> {percTaxa}_{percIncorp}_{n_gradient_reps}.config
echo '# selecting incorporator taxa (over-riding percTaxa)'
echo '## this is to make the gradient replicates consistent (qSIP finds mean among replicates)'
SIPSim KDE_selectTaxa \
ampFrags_KDE_dif_DBL.pkl \
-s {nIncorporators} > incorporators.txt
echo '# making community file'
SIPSim communities \
--config {percTaxa}_{percIncorp}_{n_gradient_reps}.config \
{genomeIndex} \
> comm.txt
echo '# adding isotope incorporation to BD distribution'
echo '## same incorporators for each treatment gradient'
SIPSim isotope_incorp \
ampFrags_KDE_dif_DBL.pkl \
{percTaxa}_{percIncorp}_{n_gradient_reps}.config \
--comm comm.txt \
--taxa incorporators.txt \
--np {np} \
> ampFrags_KDE_dif_DBL_inc.pkl
echo '# calculating BD shift from isotope incorporation'
SIPSim BD_shift \
ampFrags_KDE_dif_DBL.pkl \
ampFrags_KDE_dif_DBL_inc.pkl \
--np {np} \
> ampFrags_KDE_dif_DBL_inc_BD-shift.pkl
echo '# simulating gradient fractions'
SIPSim gradient_fractions \
comm.txt > fracs.txt
echo '# simulating an OTU table'
SIPSim OTU_table \
ampFrags_KDE_dif_DBL_inc.pkl \
comm.txt \
fracs.txt \
--abs {abs} \
--np {np} \
> OTU_abs{abs}.txt
echo '# simulating PCR'
SIPSim OTU_PCR \
OTU_abs{abs}.txt \
> OTU_abs{abs}_PCR.txt
echo '# subsampling from the OTU table (simulating sequencing of the DNA pool)'
SIPSim OTU_subsample \
OTU_abs{abs}_PCR.txt \
> OTU_abs{abs}_PCR_sub.txt
echo '# making a wide-formatted table'
SIPSim OTU_wideLong -w \
OTU_abs{abs}_PCR_sub.txt \
> OTU_abs{abs}_PCR_sub_w.txt
echo '# making metadata (phyloseq: sample_data)'
SIPSim OTU_sampleData \
OTU_abs{abs}_PCR_sub.txt \
> OTU_abs{abs}_PCR_sub_meta.txt
echo '# Removing control.json file'
rm -f control.json
In [392]:
!chmod 777 $bashFile
!cd $workDir; \
nestrun --template-file $bashFile -d default_rep3 --log-file log.txt -j 1
In [398]:
# building tree structure
nest = nestly.Nest()
## varying params
nest.add('abs', [prefrac_comm_abundance])
nest.add('sim_rep', range(10))
# constant params
expDesignFile = os.path.join(buildDir, 'exp_design.txt')
nest.add('expDesignFile', [expDesignFile], create_dir=False)
nest.add('buildDir', [buildDir], create_dir=False)
# building directory tree
nest.build(buildDir)
# bash file to run
bashFile = os.path.join(buildDir, 'qSIPRun.sh')
In [399]:
%%writefile $expDesignFile
1 control
2 treatment
3 control
4 treatment
5 control
6 treatment
In [400]:
%%writefile $bashFile
#!/bin/bash
# qSIP
SIPSim qSIP \
--reps 3 \
{buildDir}/{abs}/OTU_abs1e9.txt {buildDir}/{abs}/OTU_abs1e9_PCR_sub.txt \
> OTU_abs1e9_PCR_sub_qSIP.txt
# atom excess
SIPSim qSIP_atomExcess \
OTU_abs1e9_PCR_sub_qSIP.txt \
{expDesignFile} \
--np 14 \
> OTU_abs1e9_PCR_sub_qSIP_atom.txt
In [ ]:
!chmod 777 $bashFile
!cd $workDir; \
nestrun --template-file $bashFile -d default_rep3 --log-file log.txt -j 2
In [403]:
%%R -i buildDir -i prefrac_comm_abundance
OTU_file = file.path(buildDir, prefrac_comm_abundance, 'OTU_abs1e9.txt')
df.OTU = read.delim(OTU_file, sep='\t') %>%
group_by(library, fraction, BD_mid) %>%
summarize(total_count = sum(count)) %>%
ungroup() %>%
mutate(library = library %>% as.character)
df.OTU %>% head(n=3)
In [404]:
%%R -w 800 -h 300
ggplot(df.OTU, aes(BD_mid, total_count, color=library)) +
geom_point() +
geom_line() +
theme_bw() +
theme(
text = element_text(size=16)
)
In [405]:
qPCR_files = !find $buildDir -maxdepth 3 -name "OTU_abs1e9_PCR_sub_qSIP.txt"
qPCR_files
Out[405]:
In [406]:
%%R -i qPCR_files
# loading files
df = list()
for (x in qPCR_files){
f = gsub('/home/nick/notebook/SIPSim/dev/bac_genome1147/qSIP/default_rep3/1e9/([0-9.]+)/.+', '\\1', x)
df[[f]] = read.delim(x, sep='\t') %>%
distinct(library, fraction) %>%
select(-taxon) %>%
as.data.frame()
}
df = do.call('rbind', df)
df$file = gsub('\\.[0-9]+$', '', rownames(df))
rownames(df) = 1:nrow(df)
df = df %>%
mutate(exp_design = ifelse(library %% 2 == 0, 'treatment', 'control'),
library = library %>% as.character,
file = file %>% as.numeric)
df %>% head(n=3)
In [407]:
%%R -w 800 -h 1000
ggplot(df, aes(BD_mid, total_qPCR_copies, color=exp_design, group=library, shape=library)) +
geom_point() +
geom_line() +
facet_grid(file ~ .) +
#scale_y_log10() +
labs(x = 'Buoyant density', y='Total 16S copies (qPCR)') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [408]:
%%R -w 800 -h 1000
df = df %>%
group_by(file, library) %>%
mutate(frac_total_qPCR_copies = total_qPCR_copies / sum(total_qPCR_copies)) %>%
ungroup()
ggplot(df, aes(BD_mid, frac_total_qPCR_copies, color=exp_design, group=library, shape=library)) +
geom_point() +
geom_line() +
facet_grid(file ~ .) +
labs(x = 'Buoyant density', y='Proportion of 16S rRNA\ngene abundance (total)') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [409]:
qPCR_files = !find $buildDir -maxdepth 3 -name "OTU_abs1e9_PCR_sub_qSIP_atom.txt"
qPCR_files
Out[409]:
In [410]:
%%R -i qPCR_files
# loading files
df.atom = list()
for (x in qPCR_files){
f = gsub('/home/nick/notebook/SIPSim/dev/bac_genome1147/qSIP/default_rep3/1e9/([0-9.]+)/.+', '\\1', x)
df.atom[[f]] = read.delim(x, sep='\t')
}
df.atom = do.call('rbind', df.atom)
df.atom$file = gsub('\\.[0-9]+$', '', rownames(df.atom))
rownames(df.atom) = 1:nrow(df.atom)
df.atom %>% head(n=3)
In [411]:
%%R -w 800 -h 1000
ggplot(df.atom, aes(BD_diff)) +
geom_histogram() +
labs(x='Density shift') +
facet_grid(file ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [412]:
%%R -w 350 -h 300
ggplot(df.atom, aes(BD_diff, atom_fraction_excess, color=file)) +
geom_point() +
labs(x='density shift', y='atom fraction excess') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [413]:
%%R
ret = lm(atom_fraction_excess ~ BD_diff, data=df.atom)
print(ret)
summary(ret)
In [414]:
%%R -w 800 -h 1200
df.atom = df.atom %>%
mutate(incorporator = ifelse(atom_CI_low > 0, TRUE, FALSE)) %>%
group_by(taxon) %>%
mutate(median_atom_frac = median(atom_fraction_excess, na.rm=TRUE)) %>%
ungroup()
df.atom$taxon = reorder(df.atom$taxon, -df.atom$median_atom_frac)
In [415]:
%%R -w 800 -h 1200
ggplot(df.atom, aes(taxon, atom_fraction_excess, color=incorporator)) +
geom_point(alpha=0.3, size=0.5) +
labs(y = 'atom fraction excess') +
facet_grid(file ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.text.x = element_blank()
)
In [416]:
%%R -w 800 -h 1200
ggplot(df.atom, aes(taxon, atom_fraction_excess, color=incorporator,
ymin=atom_CI_low, ymax=atom_CI_high)) +
geom_point(alpha=0.3, size=0.5) +
geom_linerange(alpha=0.5) +
geom_linerange(data=df.atom %>% filter(incorporator==TRUE)) +
labs(y = 'atom fraction excess') +
facet_grid(file ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.text.x = element_blank()
)