In [19]:
#workDir = '/home/nick/notebook/SIPSim/dev/Leuders2004/'
workDir = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/Lueders2004/'
lueders_fig1 = file.path(workDir,'Lueders2004_Fig1.xls')
In [7]:
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [10]:
# loading file
F = file.path(workDir, 'shotFrag_OTU_n2_abs1e9.txt')
df = read.delim(F)
df1 = df %>%
filter(library == 1,
taxon == 'Methanosarcina_barkeri_MS')
df2 = df %>%
filter(library == 2,
taxon == 'Methylobacterium_extorquens_AM1')
df = rbind(df1, df2)
df.all = df %>%
group_by(library) %>%
mutate(rel_abund = count / max(count)) %>%
ungroup()
df.all %>% head(n=3) %>% as.data.frame
In [16]:
# max possible shift for M barkeri (using mean G+C)
max_shift = 1.727326 + 0.036
# plotting
p.all = ggplot(df.all, aes(BD_min, rel_abund, color=taxon)) +
geom_point() +
geom_line() +
#geom_vline(xintercept=c(1.698416, 1.727326), linetype='dashed', alpha=0.5) +
#geom_vline(xintercept=c(max_shift), linetype='dashed', alpha=0.5, color='blue') +
scale_x_continuous(limits=c(1.68, 1.78), breaks=seq(1.68, 1.78, 0.02)) +
scale_color_discrete('') +
labs(x='CsCl buoyant density [g ml^-1]', y='Ratio of maximum quantities',
title='All DNA fragments') +
theme_bw() +
theme(
#text = element_text(size=16),
legend.position = 'bottom'
)
options(repr.plot.width=5, repr.plot.height=3.5)
plot(p.all)
In [17]:
# loading file
F = file.path(workDir, 'ampFrag_OTU_n2_abs1e9.txt')
df = read.delim(F)
df1 = df %>%
filter(library == 1,
taxon == 'Methanosarcina_barkeri_MS')
df2 = df %>%
filter(library == 2,
taxon == 'Methylobacterium_extorquens_AM1')
df = rbind(df1, df2)
df.16S = df %>%
group_by(library) %>%
mutate(rel_abund = count / max(count)) %>%
ungroup()
df.16S %>% head(n=3) %>% as.data.frame
In [18]:
# max possible shift for M barkeri (using mean G+C)
max_shift = 1.727326 + 0.036
# plotting
p.16S = ggplot(df.16S, aes(BD_min, rel_abund, color=taxon)) +
geom_point() +
geom_line() +
#geom_vline(xintercept=c(1.698416, 1.727326), linetype='dashed', alpha=0.5) +
#geom_vline(xintercept=c(max_shift), linetype='dashed', alpha=0.5, color='blue') +
scale_x_continuous(limits=c(1.68, 1.78), breaks=seq(1.68, 1.78, 0.02)) +
scale_color_discrete('') +
labs(x='CsCl buoyant density [g ml^-1]', y='Ratio of maximum quantities',
title='DNA fragments containing 16S rRNA genes') +
theme_bw() +
theme(
#text = element_text(size=16),
legend.position = 'bottom'
)
options(repr.plot.width=5, repr.plot.height=3.5)
plot(p.16S)
In [23]:
# loading data
df.lueders = readxl::read_excel(lueders_fig1)
df.lueders$DNA = factor(df.lueders$DNA, levels=c('total', 'qPCR'))
# plotting
p.f1 = ggplot(df.lueders, aes(BD, conc, color=Taxon)) +
geom_point() +
geom_line() +
scale_x_continuous(limits=c(1.68, 1.78),
breaks=seq(1.68, 1.78, 0.02),
expand=c(0.001,0.001)) +
labs(y='Ratio of maximum quantities') +
facet_grid(DNA ~ .) +
theme_bw()
options(repr.plot.width=5, repr.plot.height=3.5)
plot(p.f1)
In [24]:
# simulation data
tmp1 = df.all %>%
dplyr::select(taxon, BD_min, count) %>%
mutate(DNA = 'Total DNA')
tmp2 = df.16S %>%
dplyr::select(taxon, BD_min, count) %>%
mutate(DNA = '16S rRNA genes')
df.j = rbind(tmp1, tmp2) %>%
group_by(taxon, DNA) %>%
mutate(rel_abund = count / max(count)) %>%
ungroup() %>%
mutate(taxon = gsub('Methanosarcina_barkeri_MS', 'M. barkeri MS', taxon),
taxon = gsub('Methylobacterium_extorquens_AM1', 'M. extorquens AM1', taxon)) %>%
dplyr::select(-count) %>%
mutate(dataset='simulation')
colnames(df.j) = c('Taxon', 'BD', 'DNA', 'conc', 'dataset')
tmp1 = tmp2 = NULL
# status
df.j %>% tail(n=3)
In [25]:
df.lueders.f = df.lueders %>%
mutate(DNA = gsub('total', 'Total DNA', DNA),
DNA = gsub('qPCR', '16S rRNA genes', DNA),
Taxon = gsub('M. barkeri', 'M. barkeri MS', Taxon),
Taxon = gsub('M. extorquens', 'M. extorquens AM1', Taxon)) %>%
mutate(dataset='Lueders et al. 2004')
df.j.j = rbind(df.j, df.lueders.f)
df.j.j %>% head(n=3)
In [31]:
df.j.j$DNA = factor(df.j.j$DNA, levels=c('Total DNA', '16S rRNA genes'))
x.lab = expression(paste('Buoyant density (g ml' ^ '-1', ')'))
# plotting
p = ggplot(df.j.j, aes(BD, conc, color=dataset, shape=Taxon)) +
geom_vline(xintercept=c(1.7, 1.752), linetype='dashed', alpha=0.3) +
geom_point() +
geom_line() +
scale_color_discrete('Dataset') +
scale_x_continuous(limits=c(1.68, 1.78),
breaks=seq(1.68, 1.78, 0.02),
expand=c(0.001,0.001)) +
labs(x=x.lab, y='Ratio of maximum quantities') +
facet_grid(DNA ~ .) +
theme_bw()
options(repr.plot.width=5, repr.plot.height=3.5)
plot(p)
In [35]:
# saving
F = file.path(workDir, 'Lueders2004_validate.pdf')
ggsave(F, p, width=6, height=4.5)
cat('File written:', F, '\n')
Figure Lueders: dashed vertical lines at 1.7 and 1.752 g ml-1, which represent max 16S rRNA gene copy numbers for M. barkeri MS quantified in Lueders et al., 2004.
In [ ]: