In [115]:
import os
import glob
import re
import nestly
In [116]:
%load_ext rpy2.ipython
%load_ext pushnote
In [117]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
In [118]:
%%R
## min G+C cutoff
min_GC = 13.5
## max G+C cutoff
max_GC = 80
## max G+C shift
max_13C_shift_in_BD = 0.036
min_BD = min_GC/100.0 * 0.098 + 1.66
max_BD = max_GC/100.0 * 0.098 + 1.66
max_BD = max_BD + max_13C_shift_in_BD
cat('Min BD:', min_BD, '\n')
cat('Max BD:', max_BD, '\n')
In [126]:
workDir = '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/'
buildDir = os.path.join(workDir, 'rep4_DBL-comm_bw')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags_kde_parsed.pkl'
commFile = '/home/nick/notebook/SIPSim/dev/fullCyc/fullCyc_12C-Con_trm_comm.txt'
# emperical data for validation
emp_shan_file = '/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_shan.txt'
emp_BDspan_file = '/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_trm_BD-span.txt'
emp_corr_file = '/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_trm_corr.txt'
nreps = 4
nprocs = 10
In [127]:
# building tree structure
nest = nestly.Nest()
# varying params
nest.add('DBL_scaling', [0, 0.2, 0.4, 0.8])
nest.add('bandwidth', [0.06, 0.6])
nest.add('rep', [x + 1 for x in xrange(nreps)])
## set params
nest.add('abs', ['1e9'], create_dir=False)
nest.add('percIncorp', [0], create_dir=False)
nest.add('percTaxa', [0], create_dir=False)
nest.add('np', [nprocs], create_dir=False)
nest.add('subsample_dist', ['lognormal'], create_dir=False)
nest.add('subsample_mean', [9.432], create_dir=False)
nest.add('subsample_scale', [0.5], create_dir=False)
nest.add('subsample_min', [10000], create_dir=False)
nest.add('subsample_max', [30000], create_dir=False)
### input/output files
nest.add('buildDir', [buildDir], create_dir=False)
nest.add('R_dir', [R_dir], create_dir=False)
nest.add('fragFile', [fragFile], create_dir=False)
nest.add('commFile', [commFile], create_dir=False)
# building directory tree
nest.build(buildDir)
# bash file to run
bashFile = os.path.join(buildDir, 'SIPSimRun.sh')
In [128]:
%%writefile $bashFile
#!/bin/bash
export PATH={R_dir}:$PATH
echo '#-- SIPSim pipeline --#'
echo '# shuffling taxa in comm file'
comm_shuffle_taxa.r {commFile} > comm.txt
echo '# adding diffusion'
SIPSim diffusion \
{fragFile} \
--bw {bandwidth} \
--np {np} \
> ampFrags_KDE_dif.pkl
echo '# adding DBL contamination; abundance-weighted smearing'
SIPSim DBL \
ampFrags_KDE_dif.pkl \
--comm comm.txt \
--commx {DBL_scaling} \
--bw {bandwidth} \
--np {np} \
> ampFrags_KDE_dif_DBL.pkl
echo '# making incorp file'
SIPSim incorpConfigExample \
--percTaxa {percTaxa} \
--percIncorpUnif {percIncorp} \
> {percTaxa}_{percIncorp}.config
echo '# adding isotope incorporation to BD distribution'
SIPSim isotope_incorp \
ampFrags_KDE_dif_DBL.pkl \
{percTaxa}_{percIncorp}.config \
--comm comm.txt \
--bw {bandwidth} \
--np {np} \
> ampFrags_KDE_dif_DBL_inc.pkl
echo '# simulating gradient fractions'
SIPSim gradient_fractions \
comm.txt \
> fracs.txt
echo '# simulating an OTU table'
SIPSim OTU_table \
ampFrags_KDE_dif_DBL_inc.pkl \
comm.txt \
fracs.txt \
--abs {abs} \
--np {np} \
> OTU_abs{abs}.txt
#-- w/ PCR simulation --#
echo '# simulating PCR'
SIPSim OTU_PCR \
OTU_abs{abs}.txt \
> OTU_abs{abs}_PCR.txt
echo '# subsampling from the OTU table (simulating sequencing of the DNA pool)'
SIPSim OTU_subsample \
--dist {subsample_dist} \
--dist_params mean:{subsample_mean},sigma:{subsample_scale} \
--min_size {subsample_min} \
--max_size {subsample_max} \
OTU_abs{abs}_PCR.txt \
> OTU_abs{abs}_PCR_sub.txt
echo '# making a wide-formatted table'
SIPSim OTU_wideLong -w \
OTU_abs{abs}_PCR_sub.txt \
> OTU_abs{abs}_PCR_sub_w.txt
echo '# making metadata (phyloseq: sample_data)'
SIPSim OTU_sampleData \
OTU_abs{abs}_PCR_sub.txt \
> OTU_abs{abs}_PCR_sub_meta.txt
#-- w/out PCR simulation --#
echo '# subsampling from the OTU table (simulating sequencing of the DNA pool)'
SIPSim OTU_subsample \
--dist {subsample_dist} \
--dist_params mean:{subsample_mean},sigma:{subsample_scale} \
--min_size {subsample_min} \
--max_size {subsample_max} \
OTU_abs{abs}.txt \
> OTU_abs{abs}_sub.txt
echo '# making a wide-formatted table'
SIPSim OTU_wideLong -w \
OTU_abs{abs}_sub.txt \
> OTU_abs{abs}_sub_w.txt
echo '# making metadata (phyloseq: sample_data)'
SIPSim OTU_sampleData \
OTU_abs{abs}_sub.txt \
> OTU_abs{abs}_sub_meta.txt
#-- making summary tables --#
# PCR
shannon_calc.r OTU_abs1e9_PCR_sub.txt > OTU_abs1e9_PCR_sub_shan.txt
BD_span_calc.r OTU_abs1e9_PCR_sub.txt comm.txt > OTU_abs1e9_PCR_sub_BD-span.txt
correlogram_make.r OTU_abs1e9_PCR_sub.txt > OTU_abs1e9_PCR_sub_corr.txt
# no PCR
shannon_calc.r OTU_abs1e9_sub.txt > OTU_abs1e9_sub_shan.txt
BD_span_calc.r OTU_abs1e9_sub.txt comm.txt > OTU_abs1e9_sub_BD-span.txt
correlogram_make.r OTU_abs1e9_sub.txt > OTU_abs1e9_sub_corr.txt
In [ ]:
!chmod 777 $bashFile
!cd $workDir; \
nestrun --template-file $bashFile -d rep4_DBL-comm_bw --log-file log.txt -j 1
In [ ]:
%pushnote rep4_DBL-comm_bw complete
In [145]:
%%R
# function for loading dataset files
load.data.files = function(sim.files, emp.file){
# loading
## simulations
df = list()
for(x in sim.files){
# simulation
tmp = read.delim(x, sep='\t')
xx = strsplit(x, '/')[[1]]
tmp$DBL_scale = xx[10] %>% as.numeric
tmp$bw = xx[11] %>% as.numeric
tmp$SIM_rep = xx[12] %>% as.numeric
tmp$dataset = 'Simulation'
df[[x]] = tmp
# emperical (matched for each simulation)
if(xx[12] %>% as.numeric == 1){
tmp = read.delim(emp.file, sep='\t')
tmp$DBL_scale = xx[10] %>% as.numeric
tmp$bw = xx[11] %>% as.numeric
tmp$SIM_rep = 1
tmp$dataset = 'Emperical'
xy = paste0(x, '_EMP')
df[[xy]] = tmp
}
}
df = do.call(rbind, df) %>% as.data.frame
rownames(df) = 1:nrow(df)
# return
return(df)
}
In [146]:
sim_shan_files = !find $buildDir -name "OTU_abs1e9_PCR_sub_shan.txt"
print len(sim_shan_files)
print emp_shan_file
In [147]:
%%R -i sim_shan_files -i emp_shan_file
df.shan = load.data.files(sim_shan_files, emp_shan_file)
df.shan %>% tail(n=3)
In [148]:
%%R -w 800 -h 600
# summarizing
df.shan.s = df.shan %>%
group_by(dataset, bw, DBL_scale, BD_bin = ntile(Buoyant_density, 24)) %>%
summarize(mean_shannon = mean(shannon),
sd_shannon = sd(shannon),
mean_BD = mean(Buoyant_density))
ggplot(df.shan.s, aes(mean_BD, mean_shannon, color=dataset,
ymin=mean_shannon-sd_shannon, ymax=mean_shannon+sd_shannon)) +
geom_pointrange() +
facet_grid(DBL_scale ~ bw) +
labs(x='Buoyant density (binned; 24 bins)', y='Shannon index') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [172]:
%%R -w 550 -h 600
# pairwise correlations for each dataset
df.shan.bin = df.shan %>%
group_by(BD_bin = ntile(Buoyant_density, 24))
#calc.spearman = function(x){
# cor(x[,'shannon.x'], x['shannon.y'], method='spearman')[1,1]
#}
calc.pearson = function(x){
cor(x[,'shannon.x'], x['shannon.y'], method='pearson')[1,1]
}
df.shan.corr = inner_join(df.shan.bin, df.shan.bin, c('BD_bin' = 'BD_bin',
'bw' = 'bw',
'DBL_scale' = 'DBL_scale')) %>%
group_by(bw, DBL_scale, dataset.x, dataset.y) %>%
nest() %>%
mutate(model = purrr::map(data, calc.pearson)) %>%
unnest(pearson = model %>% purrr::map(function(x) x)) %>%
ungroup() %>%
select(-data, -model) %>%
mutate(pearson_txt = round(pearson, 2))
# plotting
ggplot(df.shan.corr, aes(dataset.x, dataset.y, fill=pearson)) +
geom_tile() +
geom_text(aes(label=pearson_txt), color='white', size=6) +
scale_fill_gradient(low='black', high='red') +
labs(title='Shannon index') +
facet_grid(DBL_scale ~ bw) +
theme(
text = element_text(size=16)
)
In [150]:
sim_BDspan_files = !find $buildDir -name "OTU_abs1e9_PCR_sub_BD-span.txt"
print len(sim_BDspan_files)
print emp_BDspan_file
In [151]:
%%R
df.BDspan = load.data.files(sim_BDspan_files, emp_BDspan_file)
df.BDspan %>% head
In [152]:
%%R -w 700 -h 600
# plotting
ggplot(df.BDspan, aes(mean_preFrac_abund, BD_range_perc, fill=dataset)) +
geom_hex(alpha=0.5) +
scale_x_log10() +
facet_grid(DBL_scale ~ bw) +
labs(x='Pre-fractionation abundance', y='BD span') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [153]:
%%R -i sim_BDspan_files -i emp_BDspan_file
# binning by pre-fractionation abundances
n.tile = 20
df.BDspan = df.BDspan %>%
group_by(dataset, library, DBL_scale, bw, preFrac_abund_bin = ntile(mean_preFrac_abund, n.tile)) %>%
summarize(mean_preFrac_abund = mean(mean_preFrac_abund),
var_BD_range = var(BD_range),
sd_BD_range = sd(BD_range))
df.BDspan %>% tail(n=3)
In [154]:
%%R -w 550 -h 600
calc.spearman = function(x){
cor(x[,'var_BD_range.x'], x['var_BD_range.y'], method='spearman')[1,1]
}
df.BDspan.corr = inner_join(df.BDspan, df.BDspan, c('preFrac_abund_bin' = 'preFrac_abund_bin',
'DBL_scale' = 'DBL_scale',
'bw' = 'bw')) %>%
group_by(DBL_scale, bw, dataset.x, dataset.y) %>%
nest() %>%
mutate(model = purrr::map(data, calc.spearman)) %>%
unnest(spearman = model %>% purrr::map(function(x) x)) %>%
ungroup() %>%
select(-data, -model) %>%
mutate(spearman_txt = round(spearman, 2))
# plotting
ggplot(df.BDspan.corr, aes(dataset.x, dataset.y, fill=spearman)) +
geom_tile() +
geom_text(aes(label=spearman_txt), color='white', size=6) +
scale_fill_gradient(low='black', high='red') +
labs(title='BD span') +
facet_grid(DBL_scale ~ bw) +
theme(
text = element_text(size=16)
)
In [155]:
sim_corr_files = !find $buildDir -name "OTU_abs1e9_PCR_sub_corr.txt"
print len(sim_corr_files)
print emp_corr_file
In [156]:
%%R -i sim_corr_files -i emp_corr_file
df.corr = load.data.files(sim_corr_files, emp_corr_file)
# binning
df.corr = df.corr %>%
filter(!is.na(Mantel.corr)) %>%
group_by(DBL_scale, bw, dataset, library, class.index.bin = ntile(class.index, 12))
df.corr %>% tail(n=3) %>% as.data.frame
In [157]:
%%R -w 700 -h 600
# plotting
df.corr.s = df.corr %>%
group_by(DBL_scale, bw, dataset, class.index.bin) %>%
summarize(mean_Mantel.corr = mean(Mantel.corr),
sd_Mantel.corr = sd(Mantel.corr),
mean_class.index = mean(class.index))
ggplot(df.corr.s, aes(mean_class.index, mean_Mantel.corr, color=dataset,
ymin=mean_Mantel.corr-sd_Mantel.corr,
ymax=mean_Mantel.corr+sd_Mantel.corr)) +
geom_pointrange() +
labs(x='Class index (binned; 12 bins)', y='Mantel correlation coef.') +
facet_grid(DBL_scale ~ bw) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [170]:
%%R -w 550 -h 600
# pairwise correlations for each dataset
df.shan.bin = df.shan %>%
group_by(BD_bin = ntile(Buoyant_density, 24))
calc.pearson = function(x){
cor(x[,'Mantel.corr.x'], x['Mantel.corr.y'], method='pearson')[1,1]
}
df.corr.lm = inner_join(df.corr, df.corr, c('class.index.bin' = 'class.index.bin',
'bw' = 'bw',
'DBL_scale' = 'DBL_scale')) %>%
group_by(bw, DBL_scale, dataset.x, dataset.y) %>%
nest() %>%
mutate(model = purrr::map(data, calc.pearson)) %>%
unnest(pearson = model %>% purrr::map(function(x) x)) %>%
ungroup() %>%
select(-data, -model) %>%
mutate(pearson_txt = round(pearson, 2))
# plotting
ggplot(df.corr.lm, aes(dataset.x, dataset.y, fill=pearson)) +
geom_tile() +
geom_text(aes(label=pearson_txt), color='white', size=6) +
scale_fill_gradient(low='black', high='red') +
labs(title='Shannon index') +
facet_grid(DBL_scale ~ bw) +
theme(
text = element_text(size=16)
)
In [171]:
%%R -w 550 -h 600
# df.corr.lm = inner_join(df.corr, df.corr, c('class.index.bin' = 'class.index.bin',
# 'DBL_scale' = 'DBL_scale',
# 'bw' = 'bw')) %>%
# group_by(DBL_scale, bw, dataset.x, dataset.y) %>%
# nest() %>%
# mutate(model = purrr::map(data, ~lm(Mantel.corr.y ~ Mantel.corr.x, data=.))) %>%
# unnest(model %>% purrr::map(broom::glance)) %>%
# ungroup() %>%
# select(-data, -model) %>%
# mutate(r.squared_txt = round(r.squared, 2))
# # table
# #df.corr.lm %>% dplyr::select(dataset.x, dataset.y, r.squared) %>% print
# # plotting
# ggplot(df.corr.lm, aes(dataset.x, dataset.y, fill=r.squared)) +
# geom_tile() +
# geom_text(aes(label=r.squared_txt), color='white', size=6) +
# scale_fill_gradient(low='black', high='red') +
# labs(title='Beta diversity correlogram') +
# facet_grid(DBL_scale ~ bw) +
# theme(
# text = element_text(size=16)
# )
In [327]:
OTU_files = !find $buildDir -name "OTU_abs1e9_sub.txt"
OTU_files
Out[327]:
In [332]:
%%R -i OTU_files
# loading files
df.SIM = list()
for (x in OTU_files){
df = read.delim(x, sep='\t')
xx = strsplit(x, '/')[[1]]
df$DBL_scale = xx[10] %>% as.numeric
df$bw = xx[11] %>% as.numeric
df$SIM_rep = xx[12] %>% as.numeric
df.SIM[[x]] = df
}
df.SIM = do.call('rbind', df.SIM)
df.SIM$file = gsub('\\.[0-9]+$', '', rownames(df.SIM))
rownames(df.SIM) = 1:nrow(df.SIM)
df.SIM %>% head(n=3)
In [ ]:
In [ ]:
In [334]:
comm_files = !find $buildDir -name "comm.txt"
comm_files
Out[334]:
In [335]:
%%R -i comm_files
df.SIM.comm = list()
for (x in comm_files){
df = read.delim(x, sep='\t')
xx = strsplit(x, '/')[[1]]
df$DBL_scale = xx[10] %>% as.numeric
df$bw = xx[11] %>% as.numeric
df$SIM_rep = xx[12] %>% as.numeric
df.SIM.comm[[x]] = df
}
df.SIM.comm = do.call(rbind, df.SIM.comm)
rownames(df.SIM.comm) = 1:nrow(df.SIM.comm)
df.SIM.comm = df.SIM.comm %>%
rename('bulk_abund' = rel_abund_perc) %>%
mutate(bulk_abund = bulk_abund / 100)
df.SIM.comm %>% head(n=3)
In [344]:
%%R
## joining SIP & comm (pre-fractionation)
df.SIM.j = inner_join(df.SIM, df.SIM.comm, c('library' = 'library',
'taxon' = 'taxon_name',
'DBL_scale' = 'DBL_scale',
'bw' = 'bw',
'SIM_rep' = 'SIM_rep')) %>%
filter(BD_mid >= min_BD,
BD_mid <= max_BD)
df.SIM.j %>% head(n=3)
In [345]:
%%R
# calculating BD range
df.SIM.j.f = df.SIM.j %>%
filter(count > 0) %>%
group_by(bw, DBL_scale, SIM_rep) %>%
mutate(max_BD_range = max(BD_mid) - min(BD_mid)) %>%
ungroup() %>%
group_by(bw, DBL_scale, SIM_rep, taxon) %>%
summarize(mean_bulk_abund = mean(bulk_abund),
min_BD = min(BD_mid),
max_BD = max(BD_mid),
BD_range = max_BD - min_BD,
BD_range_perc = BD_range / first(max_BD_range) * 100) %>%
ungroup() %>%
mutate(SIM_rep = SIM_rep %>% as.character)
df.SIM.j.f %>% head(n=3) %>% as.data.frame
In [353]:
%%R -h 750 -w 800
## plotting
ggplot(df.SIM.j.f, aes(mean_bulk_abund, BD_range_perc, color=SIM_rep)) +
geom_point(alpha=0.5, shape='O') +
scale_x_log10(limits=c(0.0001, 0.1)) +
scale_y_continuous() +
facet_grid(DBL_scale ~ bw) +
labs(x='Pre-fractionation abundance', y='% of total BD range') +
#geom_vline(xintercept=0.001, linetype='dashed', alpha=0.5) +
theme_bw() +
theme(
text = element_text(size=16),
panel.grid = element_blank(),
legend.position = 'none'
)
In [462]:
%%R -h 750 -w 800
## plotting
ggplot(df.SIM.j.f, aes(mean_bulk_abund, BD_range_perc)) +
geom_hex() +
scale_x_log10(limits=c(0.0001, 0.1)) +
scale_y_continuous(limits=c(0,105)) +
scale_fill_gradient(low='grey95', high='black') +
facet_grid(DBL_scale ~ bw) +
labs(x='Pre-fractionation abundance', y='% of total BD range') +
#geom_vline(xintercept=0.001, linetype='dashed', alpha=0.5) +
theme_bw() +
theme(
text = element_text(size=16),
panel.grid = element_blank(),
legend.position = 'none'
)
In [355]:
%%R
# giving value to missing abundances
min.pos.val = df.SIM.j %>%
filter(rel_abund > 0) %>%
group_by() %>%
mutate(min_abund = min(rel_abund)) %>%
ungroup() %>%
filter(rel_abund == min_abund)
min.pos.val = min.pos.val[1,'rel_abund'] %>% as.numeric
imp.val = min.pos.val / 10
# convert numbers
df.SIM.j[df.SIM.j$rel_abund == 0, 'abundance'] = imp.val
# another closure operation
df.SIM.j = df.SIM.j %>%
group_by(bw, DBL_scale, SIM_rep, fraction) %>%
mutate(rel_abund = rel_abund / sum(rel_abund))
# status
cat('Below detection level abundances converted to: ', imp.val, '\n')
In [356]:
%%R
shannon_index_long = function(df, abundance_col, ...){
# calculating shannon diversity index from a 'long' formated table
## community_col = name of column defining communities
## abundance_col = name of column defining taxon abundances
df = df %>% as.data.frame
cmd = paste0(abundance_col, '/sum(', abundance_col, ')')
df.s = df %>%
group_by_(...) %>%
mutate_(REL_abundance = cmd) %>%
mutate(pi__ln_pi = REL_abundance * log(REL_abundance),
shannon = -sum(pi__ln_pi, na.rm=TRUE)) %>%
ungroup() %>%
dplyr::select(-REL_abundance, -pi__ln_pi) %>%
distinct_(...)
return(df.s)
}
In [358]:
%%R -w 800 -h 700
# calculating shannon
df.SIM.shan = shannon_index_long(df.SIM.j, 'count', 'library', 'fraction', 'bw', 'DBL_scale') %>%
filter(BD_mid >= min_BD,
BD_mid <= max_BD)
df.SIM.shan.s = df.SIM.shan %>%
group_by(bw, DBL_scale, BD_bin = ntile(BD_mid, 24)) %>%
summarize(mean_BD = mean(BD_mid),
mean_shannon = mean(shannon),
sd_shannon = sd(shannon))
# plotting
p = ggplot(df.SIM.shan.s, aes(mean_BD, mean_shannon,
ymin=mean_shannon-sd_shannon,
ymax=mean_shannon+sd_shannon)) +
geom_pointrange() +
labs(x='Buoyant density', y='Shannon index') +
facet_grid(DBL_scale ~ bw) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none'
)
p
In [367]:
%%R -w 850 -h 700
df.SIM.j.var = df.SIM.j %>%
group_by(bw, DBL_scale, SIM_rep, fraction) %>%
mutate(variance = var(rel_abund)) %>%
ungroup() %>%
distinct(bw, DBL_scale, SIM_rep, fraction) %>%
select(bw, DBL_scale, SIM_rep, fraction, variance, BD_mid) %>%
mutate(SIM_rep = SIM_rep %>% as.character)
ggplot(df.SIM.j.var, aes(BD_mid, variance, color=SIM_rep)) +
geom_point() +
geom_line() +
facet_wrap(DBL_scale ~ bw, scales='free_y', ncol=2) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [368]:
%%R -w 800 -h 600
ggplot(df.SIM.j.var %>% filter(BD_mid >= 1.75), aes(BD_mid, variance, color=SIM_rep)) +
geom_point() +
geom_line() +
facet_grid(DBL_scale ~ bw) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [451]:
%%R
BD.diffs = function(df){
BDs = df$BD_mid %>% unique
df.BD = expand.grid(BDs, BDs)
df.BD$diff = df.BD %>% apply(1, diff) %>% abs %>% as.vector
df.BD = df.BD %>%
spread(Var1, diff)
rownames(df.BD) = df.BD$Var2
df.BD$Var2 = NULL
dist.BD = df.BD %>% as.matrix
dist.BD[upper.tri(dist.BD, diag=TRUE)] = 0
dist.BD %>% as.dist
}
vegdist.by = function(df, ...){
df.w = df %>%
select(taxon, fraction, rel_abund) %>%
spread(taxon, rel_abund) %>%
as.data.frame()
rownames(df.w) = df.w$fraction
df.w$fraction = NULL
vegan::vegdist(df.w, ...)
}
m.corr = function(X, D, ...){
res = list()
for (i in 1:length(X)){
tmp = vegan::mantel.correlog(X[[i]], D[[i]], ...)
tmp = tmp['mantel.res'][['mantel.res']] %>% as.data.frame
colnames(tmp) = c('class.index', 'n.dist', 'Mantel.corr', 'Pr', 'Pr.corr')
res[[i]] = tmp
}
return(res)
}
df.SIM.j.d = df.SIM.j %>%
filter(BD_mid >= min_BD, BD_mid <= max_BD) %>%
group_by(bw, DBL_scale, SIM_rep) %>%
nest() %>%
mutate(dist.bray = lapply(data, vegdist.by),
dist.BD = lapply(data, BD.diffs),
mantel.corr = m.corr(dist.bray, dist.BD, n.class=24)) %>%
select(bw, DBL_scale, SIM_rep, mantel.corr) %>%
unnest(mantel.corr %>% purrr::map(function(x) x))
df.SIM.j.d %>% head
In [458]:
%%R -w 800 -h 650
df.SIM.j.d.s = df.SIM.j.d %>%
filter(! is.na(Mantel.corr)) %>%
group_by(bw, DBL_scale, bin = ntile(class.index, 12)) %>%
summarize(mean.class.index = mean(class.index),
mean.Mantel.corr = mean(Mantel.corr, na.rm=TRUE),
sd.Mantel.corr = sd(Mantel.corr, na.rm=TRUE),
max.Pr.corr = max(Pr.corr, na.rm=TRUE)) %>%
ungroup() %>%
mutate(significant = ifelse(max.Pr.corr <= 0.05, TRUE, FALSE))
ggplot(df.SIM.j.d.s, aes(mean.class.index, mean.Mantel.corr, color=significant,
ymin=mean.Mantel.corr-sd.Mantel.corr,
ymax=mean.Mantel.corr+sd.Mantel.corr)) +
geom_pointrange() +
scale_color_discrete('P_corr < 0.05') +
labs(x='Class index (binned; 12 bins)', y='Mean Mantel correlation\n(+/- stdev)') +
facet_wrap(DBL_scale ~ bw, scale='free_y', ncol=2) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [463]:
%%R -w 800 -h 650
ggplot(df.SIM.j.d.s, aes(mean.class.index, mean.Mantel.corr, color=significant,
ymin=mean.Mantel.corr-sd.Mantel.corr,
ymax=mean.Mantel.corr+sd.Mantel.corr)) +
geom_pointrange() +
scale_color_discrete('P_corr < 0.05') +
labs(x='Class index (binned; 12 bins)', y='Mean Mantel correlation\n(+/- stdev)') +
facet_grid(DBL_scale ~ bw) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [369]:
OTU_files = !find $buildDir -name "OTU_abs1e9.txt"
OTU_files
Out[369]:
In [370]:
%%R -i OTU_files
# loading files
df.abs = list()
for (x in OTU_files){
df = read.delim(x, sep='\t')
xx = strsplit(x, '/')[[1]]
df$DBL_scale = xx[10] %>% as.numeric
df$bw = xx[11] %>% as.numeric
df$SIM_rep = xx[12] %>% as.numeric
df.abs[[x]] = df
}
df.abs = do.call('rbind', df.abs)
df.abs$file = gsub('\\.[0-9]+$', '', rownames(df.abs))
rownames(df.abs) = 1:nrow(df.abs)
df.abs %>% head(n=3)
In [371]:
%%R -w 1000 -h 500
ggplot(df.abs %>% filter(bw==0.6), aes(BD_mid, count, fill=taxon)) +
geom_area(stat='identity', position='dodge', alpha=0.5) +
labs(x='Buoyant density', y='Subsampled community\n(absolute abundance)') +
facet_grid(DBL_scale ~ SIM_rep) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none',
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
In [372]:
%%R -w 800
p1 = ggplot(df.abs %>% filter(bw==0.6, BD_mid < 1.7),
aes(BD_mid, count, fill=taxon, color=taxon)) +
labs(x='Buoyant density', y='Subsampled community\n(absolute abundance)') +
facet_grid(SIM_rep ~ DBL_scale) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none',
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
p2 = p1 + geom_line(alpha=0.25) + scale_y_log10()
p1 = p1 + geom_area(stat='identity', position='dodge', alpha=0.5)
grid.arrange(p1, p2, ncol=2)
p2
In [373]:
%%R -w 800
p1 = ggplot(df.abs %>% filter(bw==0.6, BD_mid > 1.72), aes(BD_mid, count, fill=taxon, color=taxon)) +
labs(x='Buoyant density', y='Subsampled community\n(absolute abundance)') +
facet_grid(SIM_rep ~ DBL_scale) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'none',
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
p2 = p1 + geom_line(alpha=0.25) + scale_y_log10()
p1 = p1 + geom_area(stat='identity', position='dodge', alpha=0.5)
grid.arrange(p1, p2, ncol=2)
p2
In [206]:
wkdir = '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm_bw/0.6/0.8/1/'
commParams = 'mean:-7.5,sigma:2.0'
!cd $wkdir; \
SIPSim communities \
--abund_dist_p $commParams \
taxon_names.txt \
> comm_m6s9.txt
commParams = 'mean:-7.5,sigma:2.0'
!cd $wkdir; \
SIPSim communities \
--abund_dist_p $commParams \
taxon_names.txt \
> comm_m7s9.txt
commParams = 'mean:-7.5,sigma:2.0'
!cd $wkdir; \
SIPSim communities \
--abund_dist_p $commParams \
taxon_names.txt \
> comm_m8s9.txt
In [207]:
%%R -w 650 -h 550
F = file.path('/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm_bw/0.6/0.8/1/',
'comm_m8s9.txt')
df1 = read.delim(F, sep='\t') %>%
mutate(params = 'm8s9')
F = file.path('/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm_bw/0.6/0.8/1/',
'comm_m7s9.txt')
df2 = read.delim(F, sep='\t') %>%
mutate(params = 'm7s9')
F = file.path('/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm_bw/0.6/0.8/1/',
'comm_m6s9.txt')
df3 = read.delim(F, sep='\t') %>%
mutate(params = 'm6s9')
df.j = rbind(df1, df2, df3)
ggplot(df.j %>% filter(rank < 10), aes(rank, rel_abund_perc, color=params)) +
geom_line() +
facet_grid(params ~ .) +
theme(
text = element_text(size=16)
)
In [ ]: