In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome10/HTSSIP/'
# params
nprocs = 10
bandwidth = 0.8
DBL_scaling = 0.5
subsample_dist = 'lognormal'
subsample_mean = 9.432
subsample_scale = 0.5
subsample_min = 10000
subsample_max = 30000
In [2]:
import os
import sys
In [3]:
%load_ext rpy2.ipython
In [4]:
%%R
library(dplyr)
library(tidyr)
library(ggplot2)
In [5]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
%cd $workDir
In [6]:
# Determining min/max BD that
## min G+C cutoff
min_GC = 13.5
## max G+C cutoff
max_GC = 80
## max G+C shift
max_13C_shift_in_BD = 0.036
min_range_BD = min_GC/100.0 * 0.098 + 1.66
max_range_BD = max_GC/100.0 * 0.098 + 1.66
max_range_BD = max_range_BD + max_13C_shift_in_BD
print('Min BD: {}'.format(min_range_BD))
print('Max BD: {}'.format(max_range_BD))
In [7]:
%%bash -s $nprocs
source activate SIPSim
SIPSim genome_index \
../genomes/genomes10.txt \
--fp ../genomes/ \
--np $1 \
> index_log.txt
tail index_log.txt
In [30]:
# %%bash -s $nprocs
# source activate SIPSim
# SIPSim incorp_config_example \
# --percTaxa 50 \
# --percIncorpMean 50 \
# --percIncorpSD 1 \
# --n_reps 3 \
# > incorp.config
# # checking output
# cat incorp.config
In [65]:
incorp_config = """
[1]
# baseline: no incorporation
treatment = control
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[2]
# 'treatment' community: possible incorporation
treatment = labeled
max_perc_taxa_incorp = 50
[[intraPopDist 1]]
distribution = normal
[[[mu]]]
[[[[interPopDist 1]]]]
start = 30
distribution = uniform
end = 90
[[[sigma]]]
[[[[interPopDist 1]]]]
start = 1
distribution = uniform
end = 1
[3]
# baseline: no incorporation
treatment = control
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[4]
# 'treatment' community: possible incorporation
treatment = labeled
max_perc_taxa_incorp = 50
[[intraPopDist 1]]
distribution = normal
[[[mu]]]
[[[[interPopDist 1]]]]
start = 30
distribution = uniform
end = 90
[[[sigma]]]
[[[[interPopDist 1]]]]
start = 1
distribution = uniform
end = 1
[5]
# baseline: no incorporation
treatment = control
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[6]
# 'treatment' community: possible incorporation
treatment = labeled
max_perc_taxa_incorp = 50
[[intraPopDist 1]]
distribution = normal
[[[mu]]]
[[[[interPopDist 1]]]]
start = 30
distribution = uniform
end = 90
[[[sigma]]]
[[[[interPopDist 1]]]]
start = 1
distribution = uniform
end = 1
"""
with open('incorp.config', 'w') as outF:
outF.write(incorp_config)
In [66]:
%%bash
source activate SIPSim
SIPSim communities \
--config incorp.config \
--abund_dist_p mean:4,sigma:1 \
../genomes/genomes10.txt \
> comm.txt
head -n 5 comm.txt
In [67]:
%%bash -s $min_range_BD $max_range_BD
source activate SIPSim
SIPSim gradient_fractions \
--BD_min $1 \
--BD_max $2 \
comm.txt \
> fracs.txt
head -n 5 fracs.txt
In [68]:
%%bash -s $nprocs
source activate SIPSim
# creating amplicon fragments
SIPSim fragments \
../genomes/genomes10.txt \
--fp ../genomes/ \
--fr ../515Fm-927Rm.fna \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--np $1 \
> genome10_ampFrag.pkl
In [69]:
%%bash -s $bandwidth
source activate SIPSim
SIPSim fragment_KDE \
genome10_ampFrag.pkl \
--bw $1 \
> genome10_ampFrag_KDE.pkl
In [70]:
%%bash -s $nprocs
source activate SIPSim
# info on KDEs
SIPSim KDE_info \
-s genome10_ampFrag_KDE.pkl \
> genome10_ampFrag_info.txt
In [71]:
%%R
# loading
df = read.delim('genome10_ampFrag_info.txt', sep='\t')
df_kde1 = df %>%
filter(KDE_ID == 1)
df_kde1 %>% head(n=3)
BD_GC50 = 0.098 * 0.5 + 1.66
In [72]:
%%R -w 500 -h 400
# plotting
p_amp = ggplot(df_kde1, aes(taxon_ID, median)) +
geom_pointrange(aes(ymin=median-stdev, ymax=median+stdev)) +
geom_hline(yintercept=BD_GC50, linetype='dashed', color='red', alpha=0.7) +
labs(x='Median buoyant density') +
theme_bw() +
theme(
text = element_text(size=16),
axis.text.x = element_text(angle=65, hjust=1)
)
plot(p_amp)
In [73]:
%%bash -s $bandwidth $nprocs
source activate SIPSim
SIPSim diffusion \
genome10_ampFrag_KDE.pkl \
--bw $1 \
--np $2 \
> genome10_ampFrag_dif_KDE.pkl
In [74]:
%%bash -s $DBL_scaling $nprocs
source activate SIPSim
SIPSim DBL \
--comm comm.txt \
--commx $1 \
--np $2 \
-o genome10_ampFrag_dif_DBL_KDE.pkl \
genome10_ampFrag_dif_KDE.pkl \
2> genome10_ampFrag_dif_DBL_KDE.log
# checking output
head -n 5 genome10_ampFrag_dif_DBL_KDE.log
echo "..."
tail -n 5 genome10_ampFrag_dif_DBL_KDE.log
In [75]:
%%bash -s $nprocs
source activate SIPSim
SIPSim KDE_select_taxa \
-f 0.5 \
genome10_ampFrag_KDE.pkl \
> incorporators.txt
tail incorporators.txt
In [76]:
%%bash -s $nprocs
source activate SIPSim
SIPSim isotope_incorp \
--comm comm.txt \
--shift genome10_ampFrag_BD-shift.txt \
--taxa incorporators.txt \
--np $1 \
-o genome10_ampFrag_dif_DBL_incorp_KDE.pkl \
genome10_ampFrag_dif_DBL_KDE.pkl \
incorp.config \
2> genome10_ampFrag_dif_DBL_incorp_KDE.log
# checking log
head -n 5 genome10_ampFrag_dif_DBL_incorp_KDE.log
echo "..."
tail -n 5 genome10_ampFrag_dif_DBL_incorp_KDE.log
In [77]:
%%R -w 1000 -h 350
df = read.delim('genome10_ampFrag_BD-shift.txt', sep='\t') %>%
mutate(library = library %>% as.character)
p = ggplot(df, aes(taxon, mean, fill=library)) +
geom_boxplot(aes(ymin=min, lower=q25, middle=median, upper=q75, ymax=max)) +
labs(y='BD shift') +
theme_bw() +
theme(
axis.text.x = element_text(angle=45, hjust=1)
)
plot(p)
In [ ]:
%%bash -s $nprocs
source activate SIPSim
SIPSim OTU_table \
--abs 1e9 \
--np $1 \
genome10_ampFrag_dif_DBL_incorp_KDE.pkl \
comm.txt \
fracs.txt \
> OTU_n3_abs1e9.txt \
2> OTU_n3_abs1e9.log
# checking log
head -n 3 OTU_n3_abs1e9.log
echo '...'
tail -n 3 OTU_n3_abs1e9.log
In [ ]:
%%bash
source activate SIPSim
SIPSim OTU_wide_long -w \
OTU_n3_abs1e9.txt \
> OTU_n3_abs1e9_w.txt
In [ ]:
%%bash
source activate SIPSim
SIPSim OTU_wide_long -w \
OTU_n3_abs1e9.txt \
> OTU_n3_abs1e9_w.txt
In [ ]:
%%bash
source activate SIPSim
SIPSim OTU_sample_data \
OTU_n3_abs1e9.txt \
> OTU_n3_abs1e9_meta.txt
In [ ]:
%%bash
source activate SIPSim
SIPSim qSIP \
OTU_n3_abs1e9.txt \
OTU_n3_abs1e9.txt \
> OTU_n3_abs1e9_qSIP.txt
In [ ]:
%%bash
source activate SIPSim
# getting around `WARNING: ignoring environment value of R_HOME` to STDOUT error
unset R_HOME
# making phyloseq object
phyloseq_make.R OTU_n3_abs1e9_w.txt \
-s OTU_n3_abs1e9_meta.txt \
> OTU_n3_abs1e9.physeq
In [ ]:
%%bash
source activate SIPSim
SIPSim OTU_PCR \
OTU_n3_abs1e9.txt \
> OTU_n3_abs1e9_PCR.txt
In [ ]:
%%bash -s $subsample_dist $subsample_mean $subsample_scale $subsample_min $subsample_max
source activate SIPSim
SIPSim OTU_subsample \
--dist $1 \
--dist_params mean:$2,sigma:$3 \
--min_size $4 \
--max_size $5 \
OTU_n3_abs1e9_PCR.txt \
> OTU_n3_abs1e9_PCR_subNorm.txt
In [ ]:
%%bash
source activate SIPSim
SIPSim OTU_wide_long -w \
OTU_n3_abs1e9_PCR_subNorm.txt \
> OTU_n3_abs1e9_PCR_subNorm_w.txt
In [ ]:
%%bash
source activate SIPSim
SIPSim OTU_sample_data \
OTU_n3_abs1e9_PCR_subNorm.txt \
> OTU_n3_abs1e9_PCR_subNorm_meta.txt
In [ ]:
%%bash
source activate SIPSim
SIPSim qSIP \
OTU_n3_abs1e9.txt \
OTU_n3_abs1e9_PCR_subNorm.txt \
> OTU_n3_abs1e9_PCR_subNorm_qSIP.txt
In [ ]:
%%bash
source activate SIPSim
# getting around `WARNING: ignoring environment value of R_HOME` to STDOUT error
unset R_HOME
# making phyloseq object
phyloseq_make.R OTU_n3_abs1e9_PCR_subNorm_w.txt \
-s OTU_n3_abs1e9_PCR_subNorm_meta.txt \
> OTU_n3_abs1e9_PCR_subNorm.physeq
In [ ]: