Lueders et al., 2004 (barkeri & extorquens)

  • Re-plotting figure for paper

Setting variables


In [19]:
#workDir = '/home/nick/notebook/SIPSim/dev/Leuders2004/'
workDir = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/Lueders2004/'

lueders_fig1 = file.path(workDir,'Lueders2004_Fig1.xls')

In [7]:
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)

Plotting OTU abundances


In [10]:
# loading file
F = file.path(workDir, 'shotFrag_OTU_n2_abs1e9.txt')
df =  read.delim(F)

df1 = df %>%
    filter(library == 1,
           taxon == 'Methanosarcina_barkeri_MS')
df2 = df %>%
    filter(library == 2,
           taxon == 'Methylobacterium_extorquens_AM1')
df = rbind(df1, df2)

df.all = df %>% 
    group_by(library) %>%
    mutate(rel_abund = count / max(count)) %>%
    ungroup()
df.all %>% head(n=3) %>% as.data.frame


librarytaxonfractionBD_minBD_midBD_maxcountrel_abund
1 Methanosarcina_barkeri_MS-inf-1.660 -Inf 1.659 1.659 277962 5.777657e-04
1 Methanosarcina_barkeri_MS1.660-1.662 1.660 1.661 1.662 16101 3.346718e-05
1 Methanosarcina_barkeri_MS1.662-1.668 1.662 1.665 1.668 31705 6.590131e-05

In [16]:
# max possible shift for M barkeri (using mean G+C)
max_shift = 1.727326 + 0.036

# plotting
p.all = ggplot(df.all, aes(BD_min, rel_abund, color=taxon)) +
    geom_point() +
    geom_line() +
    #geom_vline(xintercept=c(1.698416, 1.727326), linetype='dashed', alpha=0.5) +
    #geom_vline(xintercept=c(max_shift), linetype='dashed', alpha=0.5, color='blue') +
    scale_x_continuous(limits=c(1.68, 1.78), breaks=seq(1.68, 1.78, 0.02)) +
    scale_color_discrete('') +
    labs(x='CsCl buoyant density [g ml^-1]', y='Ratio of maximum quantities', 
         title='All DNA fragments') +
    theme_bw() +
    theme( 
        #text = element_text(size=16),
        legend.position = 'bottom'
    )

options(repr.plot.width=5, repr.plot.height=3.5)
plot(p.all)


Warning message:
“Removed 13 rows containing missing values (geom_point).”Warning message:
“Removed 13 rows containing missing values (geom_path).”

SIPSim of amplicon fragments

  • for comparing to qPCR results

Plotting OTU abundances


In [17]:
# loading file
F = file.path(workDir, 'ampFrag_OTU_n2_abs1e9.txt')
df =  read.delim(F)

df1 = df %>%
    filter(library == 1,
           taxon == 'Methanosarcina_barkeri_MS')
df2 = df %>%
    filter(library == 2,
           taxon == 'Methylobacterium_extorquens_AM1')
df = rbind(df1, df2)

df.16S = df %>% 
    group_by(library) %>%
    mutate(rel_abund = count / max(count)) %>%
    ungroup()
df.16S %>% head(n=3) %>% as.data.frame


librarytaxonfractionBD_minBD_midBD_maxcountrel_abund
1 Methanosarcina_barkeri_MS-inf-1.660 -Inf 1.659 1.659 241572 5.054602e-04
1 Methanosarcina_barkeri_MS1.660-1.662 1.660 1.661 1.662 16581 3.469374e-05
1 Methanosarcina_barkeri_MS1.662-1.668 1.662 1.665 1.668 23103 4.834024e-05

In [18]:
# max possible shift for M barkeri (using mean G+C)
max_shift = 1.727326 + 0.036

# plotting
p.16S = ggplot(df.16S, aes(BD_min, rel_abund, color=taxon)) +
    geom_point() +
    geom_line() +
    #geom_vline(xintercept=c(1.698416, 1.727326), linetype='dashed', alpha=0.5) +
    #geom_vline(xintercept=c(max_shift), linetype='dashed', alpha=0.5, color='blue') +
    scale_x_continuous(limits=c(1.68, 1.78), breaks=seq(1.68, 1.78, 0.02)) +
    scale_color_discrete('') +
    labs(x='CsCl buoyant density [g ml^-1]', y='Ratio of maximum quantities', 
         title='DNA fragments containing 16S rRNA genes') +
    theme_bw() +
    theme( 
        #text = element_text(size=16),
        legend.position = 'bottom'
    )

options(repr.plot.width=5, repr.plot.height=3.5)
plot(p.16S)


Warning message:
“Removed 13 rows containing missing values (geom_point).”Warning message:
“Removed 13 rows containing missing values (geom_path).”

Plotting with Lueders

Lueders 2004, figure 1


In [23]:
# loading data
df.lueders = readxl::read_excel(lueders_fig1)
df.lueders$DNA = factor(df.lueders$DNA, levels=c('total', 'qPCR'))

# plotting
p.f1 = ggplot(df.lueders, aes(BD, conc, color=Taxon)) +
    geom_point() +
    geom_line() +
    scale_x_continuous(limits=c(1.68, 1.78), 
                       breaks=seq(1.68, 1.78, 0.02), 
                       expand=c(0.001,0.001)) +
    labs(y='Ratio of maximum quantities') +
    facet_grid(DNA ~ .) +
    theme_bw() 

options(repr.plot.width=5, repr.plot.height=3.5)
plot(p.f1)


Warning message:
“Removed 2 rows containing missing values (geom_point).”

Combining data


In [24]:
# simulation data
tmp1 = df.all %>% 
    dplyr::select(taxon, BD_min, count) %>%
    mutate(DNA = 'Total DNA')

tmp2 = df.16S %>% 
    dplyr::select(taxon, BD_min, count) %>%
    mutate(DNA = '16S rRNA genes')

df.j = rbind(tmp1, tmp2) %>%
    group_by(taxon, DNA) %>%
    mutate(rel_abund = count / max(count)) %>%
    ungroup() %>%
    mutate(taxon = gsub('Methanosarcina_barkeri_MS', 'M. barkeri MS', taxon),
           taxon = gsub('Methylobacterium_extorquens_AM1', 'M. extorquens AM1', taxon)) %>%
    dplyr::select(-count) %>%
    mutate(dataset='simulation')
colnames(df.j) = c('Taxon', 'BD', 'DNA', 'conc', 'dataset')
tmp1 = tmp2 = NULL

# status
df.j %>% tail(n=3)


TaxonBDDNAconcdataset
M. extorquens AM11.785 16S rRNA genes 0.0000601645 simulation
M. extorquens AM11.792 16S rRNA genes 0.0000726349 simulation
M. extorquens AM11.801 16S rRNA genes 0.0004804151 simulation

In [25]:
df.lueders.f = df.lueders %>%
    mutate(DNA = gsub('total', 'Total DNA', DNA),
           DNA = gsub('qPCR', '16S rRNA genes', DNA),
           Taxon = gsub('M. barkeri', 'M. barkeri MS', Taxon),
           Taxon = gsub('M. extorquens', 'M. extorquens AM1', Taxon)) %>%
    mutate(dataset='Lueders et al. 2004')


df.j.j = rbind(df.j, df.lueders.f)
df.j.j %>% head(n=3)


TaxonBDDNAconcdataset
M. barkeri MS -Inf Total DNA 5.777657e-04 simulation
M. barkeri MS1.660 Total DNA 3.346718e-05 simulation
M. barkeri MS1.662 Total DNA 6.590131e-05 simulation

Paper figure


In [31]:
df.j.j$DNA = factor(df.j.j$DNA, levels=c('Total DNA', '16S rRNA genes'))

x.lab = expression(paste('Buoyant density (g ml' ^ '-1', ')'))

# plotting
p = ggplot(df.j.j, aes(BD, conc, color=dataset, shape=Taxon)) +
    geom_vline(xintercept=c(1.7, 1.752), linetype='dashed', alpha=0.3) +
    geom_point() +
    geom_line() +
    scale_color_discrete('Dataset') +
    scale_x_continuous(limits=c(1.68, 1.78), 
                       breaks=seq(1.68, 1.78, 0.02), 
                       expand=c(0.001,0.001)) +
    labs(x=x.lab, y='Ratio of maximum quantities') +
    facet_grid(DNA ~ .) +
    theme_bw() 

options(repr.plot.width=5, repr.plot.height=3.5)
plot(p)


Warning message:
“Removed 28 rows containing missing values (geom_point).”Warning message:
“Removed 13 rows containing missing values (geom_path).”

In [35]:
# saving
F = file.path(workDir, 'Lueders2004_validate.pdf')
ggsave(F, p, width=6, height=4.5)
cat('File written:', F, '\n')


Warning message:
“Removed 28 rows containing missing values (geom_point).”Warning message:
“Removed 13 rows containing missing values (geom_path).”
File written: /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/Lueders2004//Lueders2004_validate.pdf 

Figure Lueders: dashed vertical lines at 1.7 and 1.752 g ml-1, which represent max 16S rRNA gene copy numbers for M. barkeri MS quantified in Lueders et al., 2004.


In [ ]: