gradient
DNA quantification
In [12]:
workDir = '/home/nick/notebook/SIPSim/dev/Leuders2004/'
lueders_fig1 = 'Lueders2004_Fig1.xls'
# params
bandwidth = 0.8
In [13]:
import os
import dill
import numpy as np
%load_ext rpy2.ipython
%load_ext pushnote
In [14]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(genomes)
library(gridExtra)
In [15]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
%cd $workDir
In [5]:
%%R
data(proks)
summary(proks)
In [6]:
%%R
update(proks)
summary(proks)
In [ ]:
%%R
M.ext = subset(proks, name %like% 'Methylobacterium extorquens AM1*')
M.bark = subset(proks, name %like% 'Methanosarcina barkeri MS*')
orgs = rbind(M.ext, M.bark)
orgFile = 'genome_info.txt'
write.table(orgs, orgFile, sep='\t', quote=FALSE, row.names=FALSE)
cat('File written:', orgFile, '\n')
In [ ]:
!seqDB_tools accession-GI2fasta \
-a 11 -n 2 -f 30 -header \
-o genomes \
genome_info.txt
In [ ]:
!ls -thlc genomes/*.fna
In [ ]:
!find ./genomes/ -name "*.fna" |\
xargs -P 2 -I % SIPSim genome_rename --prefix genomes_rn %
In [ ]:
# making a genome index file
## taxonName<tab>genomeSeqFileName
!ls -1 genomes_rn/*fna | perl -pe 's/.+\/|\.fna//g; s/(.+)/$1\t$1.fna/' > genome_index.txt
!cat genome_index.txt
In [ ]:
!SIPSim genome_index \
--fp ./genomes_rn/ \
--np 2 \
genome_index.txt \
> genome_index.log
!tail genome_index.log
In [8]:
comm = """library taxon_name rel_abund_perc rank
1 Methanosarcina_barkeri_MS 100.0 1
1 Methylobacterium_extorquens_AM1 0.0 2
2 Methylobacterium_extorquens_AM1 100.0 1
2 Methanosarcina_barkeri_MS 0.0 2
"""
comm_file = 'comm-n2-unif.txt'
with open(comm_file, 'wb') as oFH:
oFH.write(comm)
!cat $comm_file
In [117]:
!SIPSim gradient_fractions \
--BD_min 1.66 \
--BD_max 1.8 \
--params mu:0.0075,sigma:0.001 \
comm-n2-unif.txt \
> fracs-n2-unif.txt
!wc -l fracs-n2-unif.txt
!head -n 5 fracs-n2-unif.txt
!tail -n 5 fracs-n2-unif.txt
In [118]:
%%R -w 800 -h 300
# plotting
df = read.delim('fracs-n2-unif.txt', sep='\t')
df %>%
group_by(library) %>%
summarize(n=n()) %>%
print
ggplot(df, aes(BD_min, fraction_size, group=library, color=library %>% as.character)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks=seq(1.66, 1.8, 0.01)) +
scale_color_discrete('gradient') +
labs(x='Buoyant density', y='Fraction size (buoyant density)') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [32]:
!SIPSim fragments \
genome_index.txt \
--fp genomes \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 100X \
--np 2 \
2> shotFrag_skewN90-25-n5-nS.log \
> shotFrag_skewN90-25-n5-nS.pkl
!tail shotFrag_skewN90-25-n5-nS.log
In [665]:
!SIPSim fragment_KDE \
shotFrag_skewN90-25-n5-nS.pkl \
--bw $bandwidth \
> shotFrag_skewN90-25-n5-nS_kde.pkl
!ls -thlc shotFrag_skewN90-25-n5-nS_kde.pkl
In [666]:
!SIPSim KDE_info -s shotFrag_skewN90-25-n5-nS_kde.pkl
In [667]:
n=100000
!SIPSim KDE_sample -n $n shotFrag_skewN90-25-n5-nS_kde.pkl > shotFrag_skewN90-25-n5-nS_kde.txt
In [668]:
%%R -w 600 -h 400
df = read.delim('shotFrag_skewN90-25-n5-nS_kde.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [669]:
!SIPSim diffusion \
shotFrag_skewN90-25-n5-nS_kde.pkl \
--np 2 \
--bw $bandwidth \
> shotFrag_skewN90-25-n5-nS_dif_kde.pkl
!ls -thlc shotFrag_skewN90-25-n5-nS_dif_kde.pkl
In [670]:
n=100000
!SIPSim KDE_sample -n $n shotFrag_skewN90-25-n5-nS_dif_kde.pkl > shotFrag_skewN90-25-n5-nS_dif_kde.txt
In [671]:
%%R -w 600 -h 400
df = read.delim('shotFrag_skewN90-25-n5-nS_dif_kde.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [672]:
!SIPSim DBL \
shotFrag_skewN90-25-n5-nS_dif_kde.pkl \
--comm comm-n2-unif.txt \
--commx 0 \
-D 1.725 \
-w 19807714 \
--tube_height 5.1 \
--r_min 7.47 \
--r_max 8.79 \
--vertical \
--np 2 \
> shotFrag_skewN90-25-n5-nS_dif_kde_DBL.pkl
In [673]:
%%R -h 300 -w 400
df = read.delim('DBL_index.txt', sep='\t')
ggplot(df, aes(DBL_BD, ymin=vert_gradient_BD_low, ymax=vert_gradient_BD_high)) +
geom_linerange() +
scale_y_reverse() +
labs(x='Diffusive boundary layer buoyant density',
y='CsCl gradient buoyant density',
title='Extent of DBL smearing') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [674]:
n=100000
!SIPSim KDE_sample -n $n shotFrag_skewN90-25-n5-nS_dif_kde_DBL.pkl > shotFrag_skewN90-25-n5-nS_dif_kde_DBL.txt
In [675]:
%%R -w 600 -h 550
df = read.delim('shotFrag_skewN90-25-n5-nS_dif_kde_DBL.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [676]:
# making config file
config = """
[1]
# baseline: no incorp
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 0
end = 0
[2]
# half of taxa incorporate
max_perc_taxa_incorp = 100
[[intraPopDist 1]]
distribution = uniform
[[[start]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 100
end = 100
[[[end]]]
[[[[interPopDist 1]]]]
distribution = uniform
start = 100
end = 100
"""
outfile = os.path.join(workDir, 'incorp.config')
outf = open(outfile, 'wb')
outf.write(config)
outf.close()
In [677]:
!SIPSim isotope_incorp \
shotFrag_skewN90-25-n5-nS_dif_kde_DBL.pkl \
incorp.config \
--comm comm-n2-unif.txt \
--np 2 \
> shotFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.pkl
In [678]:
n=100000
!SIPSim KDE_sample -n $n shotFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.pkl \
> shotFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.txt
In [679]:
%%R -w 600 -h 550
df = read.delim('shotFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [680]:
!SIPSim OTU_table \
shotFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.pkl \
comm-n2-unif.txt \
fracs-n2-unif.txt \
--abs 1e9 \
--np 2 \
> shotFrag_OTU_n2_abs1e9.txt
!head shotFrag_OTU_n2_abs1e9.txt
In [59]:
%%R -i workDir
# loading file
F = file.path(workDir, 'shotFrag_OTU_n2_abs1e9.txt')
df = read.delim(F)
df1 = df %>%
filter(library == 1,
taxon == 'Methanosarcina_barkeri_MS')
df2 = df %>%
filter(library == 2,
taxon == 'Methylobacterium_extorquens_AM1')
df = rbind(df1, df2)
df.all = df %>%
group_by(library) %>%
mutate(rel_abund = count / max(count)) %>%
ungroup()
df.all %>% head(n=3) %>% as.data.frame
In [60]:
%%R -w 500 -h 300
# max possible shift for M barkeri (using mean G+C)
max_shift = 1.727326 + 0.036
# plotting
p.all = ggplot(df.all, aes(BD_min, rel_abund, color=taxon)) +
geom_point(size=3.5) +
geom_line() +
#geom_vline(xintercept=c(1.698416, 1.727326), linetype='dashed', alpha=0.5) +
#geom_vline(xintercept=c(max_shift), linetype='dashed', alpha=0.5, color='blue') +
scale_x_continuous(limits=c(1.68, 1.78), breaks=seq(1.68, 1.78, 0.02)) +
scale_color_discrete('') +
labs(x='CsCl buoyant density [g ml^-1]', y='Ratio of maximum quantities',
title='All DNA fragments') +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
p.all
In [136]:
# primers
primers = """>Ar109f
ACKGCTCAGTAACACGT
>Ar915r
GTGCTCCCCCGCCAATTCCT
>Ba519f
CAGCMGCCGCGGTAANWC
>Ba907r
CCGTCAATTCMTTTRAGTT
"""
primer_file = 'Ba-Ar_primers.fna'
with open(primer_file, 'wb') as outFH:
outFH.write(primers)
!cat $primer_file
In [137]:
!SIPSim fragments \
genome_index.txt \
--fp genomes_rn \
--fr Ba-Ar_primers.fna \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 10000 \
--np 2 \
2> ampFrag_skewN90-25-n5-nS.log \
> ampFrag_skewN90-25-n5-nS.pkl
!tail ampFrag_skewN90-25-n5-nS.log
In [685]:
!SIPSim fragment_KDE \
--bw $bandwidth \
ampFrag_skewN90-25-n5-nS.pkl \
> ampFrag_skewN90-25-n5-nS_kde.pkl
!ls -thlc ampFrag_skewN90-25-n5-nS_kde.pkl
In [686]:
n=100000
!SIPSim KDE_sample -n $n ampFrag_skewN90-25-n5-nS_kde.pkl > ampFrag_skewN90-25-n5-nS_kde.txt
In [687]:
%%R -w 600 -h 400
df = read.delim('ampFrag_skewN90-25-n5-nS_kde.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [688]:
!SIPSim diffusion \
--np 2 \
--bw $bandwidth \
ampFrag_skewN90-25-n5-nS_kde.pkl \
> ampFrag_skewN90-25-n5-nS_dif_kde.pkl
!ls -thlc ampFrag_skewN90-25-n5-nS_dif_kde.pkl
In [689]:
n=100000
!SIPSim KDE_sample -n $n ampFrag_skewN90-25-n5-nS_kde.pkl > ampFrag_skewN90-25-n5-nS_kde.txt
In [690]:
%%R -w 600 -h 400
df = read.delim('ampFrag_skewN90-25-n5-nS_kde.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [691]:
!SIPSim DBL \
ampFrag_skewN90-25-n5-nS_dif_kde.pkl \
--comm comm-n2-unif.txt \
--commx 0 \
-D 1.725 \
-w 19807714 \
--tube_height 5.1 \
--r_min 7.47 \
--r_max 8.79 \
--vertical \
--np 2 \
> ampFrag_skewN90-25-n5-nS_dif_kde_DBL.pkl
In [692]:
%%R -h 300 -w 400
df = read.delim('DBL_index.txt', sep='\t')
ggplot(df, aes(DBL_BD, ymin=vert_gradient_BD_low, ymax=vert_gradient_BD_high)) +
geom_linerange() +
scale_y_reverse() +
labs(x='Diffusive boundary layer buoyant density',
y='CsCl gradient buoyant density',
title='Extent of DBL smearing') +
theme_bw() +
theme(
text = element_text(size=16)
)
In [693]:
n=100000
!SIPSim KDE_sample -n $n ampFrag_skewN90-25-n5-nS_kde.pkl > ampFrag_skewN90-25-n5-nS_kde.txt
In [694]:
%%R -w 600 -h 400
df = read.delim('ampFrag_skewN90-25-n5-nS_kde.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [695]:
!SIPSim isotope_incorp \
ampFrag_skewN90-25-n5-nS_dif_kde_DBL.pkl \
incorp.config \
--comm comm-n2-unif.txt \
--np 2 \
> ampFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.pkl
In [696]:
n=100000
!SIPSim KDE_sample -n $n ampFrag_skewN90-25-n5-nS_kde.pkl > ampFrag_skewN90-25-n5-nS_kde.txt
In [697]:
%%R -w 600 -h 400
df = read.delim('ampFrag_skewN90-25-n5-nS_kde.txt', sep='\t') %>%
gather(taxon, Buoyant_density, Methanosarcina_barkeri_MS, Methylobacterium_extorquens_AM1)
p = ggplot(df, aes(Buoyant_density, fill=taxon)) +
geom_density(alpha=0.5) +
scale_x_continuous(limits=c(1.68, 1.78)) +
scale_fill_discrete('') +
facet_grid(libID ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
pl = p + scale_y_log10()
grid.arrange(p, pl, ncol=1)
In [698]:
!SIPSim OTU_table \
ampFrag_skewN90-25-n5-nS_dif_kde_DBL_incorp.pkl \
comm-n2-unif.txt \
fracs-n2-unif.txt \
--abs 1e9 \
--np 2 \
> ampFrag_OTU_n2_abs1e9.txt
!head ampFrag_OTU_n2_abs1e9.txt
In [61]:
%%R -i workDir
# loading file
F = file.path(workDir, 'ampFrag_OTU_n2_abs1e9.txt')
df = read.delim(F)
df1 = df %>%
filter(library == 1,
taxon == 'Methanosarcina_barkeri_MS')
df2 = df %>%
filter(library == 2,
taxon == 'Methylobacterium_extorquens_AM1')
df = rbind(df1, df2)
df.16S = df %>%
group_by(library) %>%
mutate(rel_abund = count / max(count)) %>%
ungroup()
df.16S %>% head(n=3) %>% as.data.frame
In [63]:
%%R -w 500 -h 300
# max possible shift for M barkeri (using mean G+C)
max_shift = 1.727326 + 0.036
# plotting
p.16S = ggplot(df.16S, aes(BD_min, rel_abund, color=taxon)) +
geom_point(size=3.5) +
geom_line() +
#geom_vline(xintercept=c(1.698416, 1.727326), linetype='dashed', alpha=0.5) +
#geom_vline(xintercept=c(max_shift), linetype='dashed', alpha=0.5, color='blue') +
scale_x_continuous(limits=c(1.68, 1.78), breaks=seq(1.68, 1.78, 0.02)) +
scale_color_discrete('') +
labs(x='CsCl buoyant density [g ml^-1]', y='Ratio of maximum quantities',
title='DNA fragments containing 16S rRNA genes') +
theme_bw() +
theme(
text = element_text(size=16),
legend.position = 'bottom'
)
p.16S
In [38]:
%%R -i workDir -i lueders_fig1 -w 550 -h 400
# loading data
df.lueders = gdata::read.xls(lueders_fig1)
df.lueders$DNA = factor(df.lueders$DNA, levels=c('total', 'qPCR'))
# plotting
ggplot(df.lueders, aes(BD, conc, color=Taxon)) +
geom_point(size=3) +
geom_line() +
scale_x_continuous(limits=c(1.68, 1.78),
breaks=seq(1.68, 1.78, 0.02),
expand=c(0.001,0.001)) +
labs(y='Ratio of maximum quantities') +
facet_grid(DNA ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [110]:
%%R
# simulation data
tmp1 = df.all %>%
dplyr::select(taxon, BD_min, count) %>%
mutate(DNA = 'Total DNA')
tmp2 = df.16S %>%
dplyr::select(taxon, BD_min, count) %>%
mutate(DNA = '16S rRNA genes')
df.j = rbind(tmp1, tmp2) %>%
group_by(taxon, DNA) %>%
mutate(rel_abund = count / max(count)) %>%
ungroup() %>%
mutate(taxon = gsub('Methanosarcina_barkeri_MS', 'M. barkeri MS', taxon),
taxon = gsub('Methylobacterium_extorquens_AM1', 'M. extorquens AM1', taxon)) %>%
dplyr::select(-count) %>%
mutate(dataset='simulation')
colnames(df.j) = c('Taxon', 'BD', 'DNA', 'conc', 'dataset')
tmp1 = tmp2 = NULL
# status
df.j %>% tail(n=3)
In [116]:
%%R
df.lueders.f = df.lueders %>%
mutate(DNA = gsub('total', 'Total DNA', DNA),
DNA = gsub('qPCR', '16S rRNA genes', DNA),
Taxon = gsub('M. barkeri', 'M. barkeri MS', Taxon),
Taxon = gsub('M. extorquens', 'M. extorquens AM1', Taxon)) %>%
mutate(dataset='Lueders et al. 2004')
df.j.j = rbind(df.j, df.lueders.f)
df.j.j %>% head(n=3)
In [213]:
%%R -w 550 -h 400
df.j.j$DNA = factor(df.j.j$DNA, levels=c('Total DNA', '16S rRNA genes'))
x.lab = expression(paste('Buoyant density (g ml' ^ '-1', ')'))
# plotting
p = ggplot(df.j.j, aes(BD, conc, color=dataset, shape=Taxon)) +
geom_point(size=3) +
geom_line() +
scale_color_discrete('Dataset') +
scale_x_continuous(limits=c(1.68, 1.78),
breaks=seq(1.68, 1.78, 0.02),
expand=c(0.001,0.001)) +
labs(x=x.lab, y='Ratio of maximum quantities') +
facet_grid(DNA ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
p
In [214]:
%%R -i workDir
# saving
F = file.path(workDir, 'Lueders2004_validate.pdf')
ggsave(F, p, width=8, height=5.8)
cat('File written:', F, '\n')
In [136]:
%%R -w 550 -h 400
# linear interpolation of conc for an evenly distributed set of BDs
#approxfun
BDs = seq(1.68, 1.78, (1.78-1.68)/19)
interp = function(df, BDs){
F = approxfun(df$BD, df$conc)
x = data.frame('BD' = BDs,
'conc_interp' = F(BDs))
return(x)
}
df.j.j.int = df.j.j %>%
group_by(dataset, Taxon, DNA) %>%
nest() %>%
mutate(conc_interp_df = lapply(data, interp, BDs=BDs)) %>%
unnest(conc_interp = conc_interp_df %>% purrr::map(function(x) x)) %>%
ungroup()
# plotting check
ggplot(df.j.j.int, aes(BD, conc_interp, color=dataset, shape=Taxon)) +
geom_point(size=3) +
geom_line() +
scale_x_continuous(limits=c(1.68, 1.78),
breaks=seq(1.68, 1.78, 0.02),
expand=c(0.001,0.001)) +
labs(y='Ratio of maximum quantities') +
facet_grid(DNA ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [155]:
%%R
# reformat
df.sim = df.j.j.int %>%
filter(!is.na(conc_interp),
dataset=='simulation')
df.L = df.j.j.int %>%
filter(!is.na(conc_interp),
dataset=='Lueders et al. 2004')
df.c = inner_join(df.sim, df.L, c('Taxon'='Taxon',
'BD'='BD',
'DNA'='DNA'))
# status
df.c %>% head(n=3)
In [182]:
%%R
calc.pearson = function(x){
xx = x[,'conc_interp.x'] %>% as.matrix %>% as.vector
xy = x[,'conc_interp.y'] %>% as.matrix %>% as.vector
res = cor.test(xx, xy, method='pearson')
df = data.frame('estimate' = c(res$estimate),
'p.value' = c(res$p.value))
return(df)
}
df.c.res = df.c %>%
group_by(Taxon, DNA) %>%
nest() %>%
mutate(model = purrr::map(data, calc.pearson)) %>%
unnest(pearson = model %>% purrr::map(function(x) x)) %>%
dplyr::select(-data, -model)
# stats
df.c.res
In [203]:
%%R -w 550 -h 400
# linear interpolation of conc for an evenly distributed set of BDs
#approxfun
BDs = seq(1.68, 1.78, (1.78-1.68)/99)
interp = function(df, BDs){
F = approxfun(df$BD, df$conc)
x = data.frame('BD' = BDs,
'conc_interp' = F(BDs))
return(x)
}
df.j.j.int = df.j.j %>%
group_by(dataset, Taxon, DNA) %>%
nest() %>%
mutate(conc_interp_df = lapply(data, interp, BDs=BDs)) %>%
unnest(conc_interp = conc_interp_df %>% purrr::map(function(x) x)) %>%
ungroup()
# plotting check
ggplot(df.j.j.int, aes(BD, conc_interp, color=dataset, shape=Taxon)) +
geom_point(size=2) +
geom_line() +
scale_x_continuous(limits=c(1.68, 1.78),
breaks=seq(1.68, 1.78, 0.02),
expand=c(0.001,0.001)) +
labs(y='Ratio of maximum quantities') +
facet_grid(DNA ~ .) +
theme_bw() +
theme(
text = element_text(size=16)
)
In [206]:
%%R
# determining locations of the modes
df.j.j.int.f = df.j.j.int %>%
group_by(dataset, Taxon, DNA) %>%
mutate(max_conc = max(conc_interp, na.rm=TRUE)) %>%
ungroup() %>%
filter(conc_interp == max_conc) %>%
dplyr::select(dataset, Taxon, BD, DNA) %>%
mutate(DNA = gsub(' ', '_', DNA),
DNA = gsub('16', 'X16', DNA)) %>%
spread(DNA, BD) %>%
mutate(BD_shift = round(abs(Total_DNA - X16S_rRNA_genes), 3))
# BD shift of mode between Total DNA * 16S amplcons
df.j.j.int.f
In [ ]: