Goal

  • Simulating a fullCyc control gradient
    • Not simulating incorporation (all 0% isotope incorp.)
      • Don't know how much true incorporatation for emperical data
  • Using parameters inferred from TRIMMED emperical data (fullCyc seq data), or if not available, default SIPSim parameters
  • Determining whether simulated taxa show similar distribution to the emperical data

Input parameters

  • phyloseq.bulk file
  • taxon mapping file
  • list of genomes
  • fragments simulated for all genomes
  • bulk community richness

workflow

  • Creating a community file from OTU abundances in bulk soil samples
    • phyloseq.bulk --> OTU table --> filter to sample --> community table format
  • Fragment simulation
    • simulated_fragments --> parse out fragments for target OTUs
    • simulated_fragments --> parse out fragments from random genomes to obtain richness of interest
    • combine fragment python objects
  • Convert fragment lists to kde object
  • Add diffusion
  • Make incorp config file
  • Add isotope incorporation
  • Calculating BD shift from isotope incorp
  • Simulating gradient fractions
  • Simulating OTU table
  • Simulating PCR
  • Subsampling from the OTU table

Init


In [259]:
import os
import glob
import re
import nestly

In [260]:
%load_ext rpy2.ipython
%load_ext pushnote


The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
The pushnote extension is already loaded. To reload it, use:
  %reload_ext pushnote

In [261]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)

BD min/max


In [262]:
%%R
## min G+C cutoff
min_GC = 13.5
## max G+C cutoff
max_GC = 80
## max G+C shift
max_13C_shift_in_BD = 0.036


min_BD = min_GC/100.0 * 0.098 + 1.66    
max_BD = max_GC/100.0 * 0.098 + 1.66    

max_BD = max_BD + max_13C_shift_in_BD

cat('Min BD:', min_BD, '\n')
cat('Max BD:', max_BD, '\n')


Min BD: 1.67323 
Max BD: 1.7744 

Nestly

  • assuming fragments already simulated

In [302]:
workDir = '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/'
buildDir = os.path.join(workDir, 'rep3_DBL-comm')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'

fragFile = '/home/nick/notebook/SIPSim/dev/bac_genome1147/validation/ampFrags_kde_parsed.pkl'
commFile = '/home/nick/notebook/SIPSim/dev/fullCyc//fullCyc_12C-Con_trm_comm.txt'

# emperical data for validation
emp_shan_file = '/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_shan.txt'
emp_BDspan_file = '/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_trm_BD-span.txt'
emp_corr_file = '/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_trm_corr.txt'

# variable params
nreps = 10

In [171]:
# building tree structure
nest = nestly.Nest()

# varying params
nest.add('rep', [x + 1 for x in xrange(nreps)])


## set params
nest.add('abs', ['1e9'], create_dir=False)
nest.add('percIncorp', [0], create_dir=False)
nest.add('percTaxa', [0], create_dir=False)
nest.add('np', [10], create_dir=False)
nest.add('subsample_dist', ['lognormal'], create_dir=False)
nest.add('subsample_mean', [9.432], create_dir=False)
nest.add('subsample_scale', [0.5], create_dir=False)
nest.add('subsample_min', [10000], create_dir=False)
nest.add('subsample_max', [30000], create_dir=False)
nest.add('bandwidth', [0.08], create_dir=False)
nest.add('DBL_scaling', [0.4], create_dir=False)

### input/output files
nest.add('buildDir', [buildDir], create_dir=False)
nest.add('R_dir', [R_dir], create_dir=False)
nest.add('fragFile', [fragFile], create_dir=False)
nest.add('commFile', [commFile], create_dir=False)


# building directory tree
nest.build(buildDir)

# bash file to run
bashFile = os.path.join(buildDir, 'SIPSimRun.sh')

In [172]:
%%writefile $bashFile
#!/bin/bash

export PATH={R_dir}:$PATH

echo '#-- SIPSim pipeline --#'

echo '# shuffling taxa in comm file'
comm_shuffle_taxa.r {commFile} > comm.txt

echo '# adding diffusion'    
SIPSim diffusion \
    {fragFile} \
    --bw {bandwidth} \
    --np {np} \
    > ampFrags_KDE_dif.pkl    

echo '# adding DBL contamination; abundance-weighted smearing'
SIPSim DBL \
    ampFrags_KDE_dif.pkl \
    --comm comm.txt \
    --commx {DBL_scaling} \
    --bw {bandwidth} \
    --np {np} \
    > ampFrags_KDE_dif_DBL.pkl
    
echo '# making incorp file'
SIPSim incorpConfigExample \
  --percTaxa {percTaxa} \
  --percIncorpUnif {percIncorp} \
  > {percTaxa}_{percIncorp}.config

echo '# adding isotope incorporation to BD distribution'
SIPSim isotope_incorp \
    ampFrags_KDE_dif_DBL.pkl \
    {percTaxa}_{percIncorp}.config \
    --comm comm.txt \
    --bw {bandwidth} \
    --np {np} \
    > ampFrags_KDE_dif_DBL_inc.pkl

echo '# simulating gradient fractions'
SIPSim gradient_fractions \
    comm.txt \
    > fracs.txt 

echo '# simulating an OTU table'
SIPSim OTU_table \
    ampFrags_KDE_dif_DBL_inc.pkl \
    comm.txt \
    fracs.txt \
    --abs {abs} \
    --np {np} \
    > OTU_abs{abs}.txt
    
#-- w/ PCR simulation --#
echo '# simulating PCR'
SIPSim OTU_PCR \
    OTU_abs{abs}.txt \
    > OTU_abs{abs}_PCR.txt    
    
echo '# subsampling from the OTU table (simulating sequencing of the DNA pool)'
SIPSim OTU_subsample \
    --dist {subsample_dist} \
    --dist_params mean:{subsample_mean},sigma:{subsample_scale} \
    --min_size {subsample_min} \
    --max_size {subsample_max} \
    OTU_abs{abs}_PCR.txt \
    > OTU_abs{abs}_PCR_sub.txt
        
echo '# making a wide-formatted table'
SIPSim OTU_wideLong -w \
    OTU_abs{abs}_PCR_sub.txt \
    > OTU_abs{abs}_PCR_sub_w.txt
    
echo '# making metadata (phyloseq: sample_data)'
SIPSim OTU_sampleData \
    OTU_abs{abs}_PCR_sub.txt \
    > OTU_abs{abs}_PCR_sub_meta.txt
    

#-- w/out PCR simulation --#    
echo '# subsampling from the OTU table (simulating sequencing of the DNA pool)'
SIPSim OTU_subsample \
    --dist {subsample_dist} \
    --dist_params mean:{subsample_mean},sigma:{subsample_scale} \
    --min_size {subsample_min} \
    --max_size {subsample_max} \
    OTU_abs{abs}.txt \
    > OTU_abs{abs}_sub.txt
        
echo '# making a wide-formatted table'
SIPSim OTU_wideLong -w \
    OTU_abs{abs}_sub.txt \
    > OTU_abs{abs}_sub_w.txt
    
echo '# making metadata (phyloseq: sample_data)'
SIPSim OTU_sampleData \
    OTU_abs{abs}_sub.txt \
    > OTU_abs{abs}_sub_meta.txt    
    
    
#-- making summary tables --#
# no PCR
shannon_calc.r OTU_abs1e9_sub.txt > OTU_abs1e9_sub_shan.txt
BD_span_calc.r OTU_abs1e9_sub.txt comm.txt > OTU_abs1e9_sub_BD-span.txt
correlogram_make.r OTU_abs1e9_sub.txt > OTU_abs1e9_sub_corr.txt
# PCR
shannon_calc.r OTU_abs1e9_PCR_sub.txt > OTU_abs1e9_PCR_sub_shan.txt
BD_span_calc.r OTU_abs1e9_PCR_sub.txt comm.txt > OTU_abs1e9_PCR_sub_BD-span.txt
correlogram_make.r OTU_abs1e9_PCR_sub.txt > OTU_abs1e9_PCR_sub_corr.txt


Overwriting /home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/SIPSimRun.sh

In [ ]:
!chmod 777 $bashFile
!cd $workDir; \
    nestrun  --template-file $bashFile -d rep3_DBL-comm --log-file log.txt -j 2


2016-02-20 12:43:05,905 * INFO * Template: ./SIPSimRun.sh
2016-02-20 12:43:05,907 * INFO * [124224] Started ./SIPSimRun.sh in rep3_DBL-comm/7
2016-02-20 12:43:05,909 * INFO * [124225] Started ./SIPSimRun.sh in rep3_DBL-comm/9

In [ ]:
%pushnote rep3_DBL-comm complete

BD min/max

  • what is the min/max BD that we care about?

In [175]:
%%R
## min G+C cutoff
min_GC = 13.5
## max G+C cutoff
max_GC = 80
## max G+C shift
max_13C_shift_in_BD = 0.036


min_BD = min_GC/100.0 * 0.098 + 1.66    
max_BD = max_GC/100.0 * 0.098 + 1.66    

max_BD = max_BD + max_13C_shift_in_BD

cat('Min BD:', min_BD, '\n')
cat('Max BD:', max_BD, '\n')


Min BD: 1.67323 
Max BD: 1.7744 

Comparing to emperical data

  • correlation/regression analyses of metrics on community composition

In [305]:
%%R

# function for loading dataset files
load.data.files = function(sim.files, emp.file){
    # loading
    ## simulations
    df = list()
    for(x in sim.files){
        tmp = read.delim(x, sep='\t')
        xx = strsplit(x, '/')[[1]]
        tmp$SIM_rep = xx[10] %>% as.numeric
        df[[x]] = tmp
    }
    df = do.call(rbind, df) %>% as.data.frame 
    rownames(df) = 1:nrow(df)
    df$dataset = 'Simulation'

    # emperical
    tmp = read.delim(emp.file, sep='\t')
    tmp$dataset = 'Emperical'
    tmp$SIM_rep = NA

    # return
    rbind(df, tmp)
    }

Shannon index


In [318]:
sim_shan_files = !find $buildDir -name "*_shan.txt"
for i in sim_shan_files:
    print(i)
print emp_shan_file


/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/7/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/9/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/8/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/3/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/10/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/2/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/1/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/4/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/5/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/6/OTU_abs1e9_sub_shan.txt
/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_shan.txt

In [320]:
%%R -i sim_shan_files -i emp_shan_file

df.shan = load.data.files(sim_shan_files, emp_shan_file) 
df.shan %>% tail(n=3)


    library             sample      OTU Buoyant_density  shannon SIM_rep
390      14 12C-Con.D14.R1_F23 OTU.1130        1.692527 5.034561      NA
391      14 12C-Con.D14.R1_F25 OTU.1130        1.684878 4.736203      NA
392      14 12C-Con.D14.R1_F27 OTU.1130        1.677228 4.893596      NA
      dataset
390 Emperical
391 Emperical
392 Emperical

In [327]:
%%R -w 800 -h 300
# summarizing
df.shan.s = df.shan %>%
    group_by(dataset, BD_bin = ntile(Buoyant_density, 24)) %>%
    summarize(mean_shannon = mean(shannon), 
              sd_shannon = sd(shannon))

ggplot(df.shan.s, aes(BD_bin, mean_shannon, color=dataset,
                      ymin=mean_shannon-sd_shannon, ymax=mean_shannon+sd_shannon)) +
    geom_pointrange() +
    labs(x='Buoyant density (binned; 24 bins)', y='Shannon index') +
    theme_bw() +
    theme(
        text = element_text(size=16)
    )



In [308]:
%%R -h 350
# pairwise correlations for each dataset
df.shan.bin = df.shan %>%
    group_by(BD_bin = ntile(Buoyant_density, 24))

calc.spearman = function(x){
    cor(x[,'shannon.x'], x['shannon.y'], method='spearman')[1,1]
}

df.shan.corr = inner_join(df.shan.bin, df.shan.bin, c('BD_bin' = 'BD_bin')) %>%
    group_by(dataset.x, dataset.y) %>%
    nest() %>%
    mutate(model = purrr::map(data, calc.spearman)) %>%
    unnest(spearman = model %>% purrr::map(function(x) x)) %>%
    ungroup() %>%
    select(-data, -model) 

# table
df.shan.corr %>% print
        
# plotting
ggplot(df.shan.corr, aes(dataset.x, dataset.y, fill=spearman)) +
    geom_tile() +
    scale_fill_gradient(low='black', high='red') +
    labs(title='Shannon index') +
    theme(
        text = element_text(size=16)
    )


Source: local data frame [4 x 3]

   dataset.x  dataset.y  spearman
       (chr)      (chr)     (dbl)
1 Simulation Simulation 0.3939976
2 Simulation  Emperical 0.3162938
3  Emperical Simulation 0.3162938
4  Emperical  Emperical 0.7072283

BD spans


In [328]:
sim_BDspan_files = !find $buildDir -name "*_BD-span.txt"
for i in sim_BDspan_files:
    print(i)
print emp_BDspan_file


/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/7/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/9/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/8/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/3/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/10/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/2/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/1/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/4/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/5/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/6/OTU_abs1e9_sub_BD-span.txt
/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_trm_BD-span.txt

In [334]:
%%R -i sim_BDspan_files -i emp_BDspan_file

df.BDspan = load.data.files(sim_BDspan_files, emp_BDspan_file) 

# binning by pre-fractionation abundances
n.tile = 20
df.BDspan = df.BDspan %>%
    group_by(dataset, library, preFrac_abund_bin = ntile(mean_preFrac_abund, n.tile)) %>%
    summarize(mean_preFrac_abund = mean(mean_preFrac_abund),
              var_BD_range = var(BD_range),
              sd_BD_range = sd(BD_range))

df.BDspan %>% tail(n=3)


Source: local data frame [3 x 6]
Groups: dataset, library [1]

     dataset library preFrac_abund_bin mean_preFrac_abund var_BD_range
       (chr)   (int)             (int)              (dbl)        (dbl)
1 Simulation       1                18        0.001519561 8.214416e-07
2 Simulation       1                19        0.002263931 6.069248e-07
3 Simulation       1                20        0.008907262 1.934923e-07
Variables not shown: sd_BD_range (dbl)

In [338]:
%%R -w 700 -h 400
# plotting
ggplot(df.BDspan, aes(mean_preFrac_abund, var_BD_range, color=dataset)) +
    geom_point() +
    geom_smooth() +
    scale_y_log10() +
    scale_x_log10()



In [311]:
%%R -h 350
calc.spearman = function(x){
    cor(x[,'var_BD_range.x'], x['var_BD_range.y'], method='spearman')[1,1]
}

df.BDspan.corr = inner_join(df.BDspan, df.BDspan, c('preFrac_abund_bin' = 'preFrac_abund_bin')) %>%
    group_by(dataset.x, dataset.y) %>%
    nest() %>%
    mutate(model = purrr::map(data, calc.spearman)) %>%
    unnest(spearman = model %>% purrr::map(function(x) x)) %>%
    ungroup() %>%
    select(-data, -model) 

# table
df.BDspan.corr %>% print
        
# plotting
ggplot(df.BDspan.corr, aes(dataset.x, dataset.y, fill=spearman)) +
    geom_tile() +
    scale_fill_gradient(low='black', high='red') +
    labs(title='BD span') +
    theme(
        text = element_text(size=16)
    )


Source: local data frame [4 x 3]

   dataset.x  dataset.y  spearman
       (chr)      (chr)     (dbl)
1  Emperical  Emperical 0.7225239
2  Emperical Simulation 0.8472205
3 Simulation  Emperical 0.8472205
4 Simulation Simulation 1.0000000

correlograms (jaccard ~ BD)


In [339]:
sim_corr_files = !find $buildDir -name "*_corr.txt"
for i in sim_corr_files:
    print(i)
print emp_corr_file


/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/7/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/9/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/8/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/3/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/10/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/2/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/1/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/4/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/5/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/6/OTU_abs1e9_sub_corr.txt
/home/nick/notebook/SIPSim/dev/fullCyc_trim/SIP-core_unk_trm_corr.txt

In [341]:
%%R -i sim_corr_files -i emp_corr_file

df.corr = load.data.files(sim_corr_files, emp_corr_file) 

# binning
df.corr = df.corr %>%
    filter(!is.na(Mantel.corr)) %>%
    group_by(dataset, library, class.index.bin = ntile(class.index, 12)) 

df.corr %>% tail(n=3) %>% as.data.frame


  library class.index n.dist Mantel.corr    Pr Pr.corr SIM_rep   dataset
1      14  0.03918000     24 -0.11120248 0.007   0.042      NA Emperical
2      14  0.04295913     26 -0.08293387 0.051   0.153      NA Emperical
3      14  0.04673826     16 -0.10856411 0.021   0.105      NA Emperical
  class.index.bin
1              10
2              11
3              12

In [352]:
%%R -w 700 -h 300
# plotting
df.corr.s = df.corr %>%
    group_by(dataset, class.index.bin) %>%
    summarize(mean_Mantel.corr = mean(Mantel.corr),
              sd_Mantel.corr = sd(Mantel.corr))

ggplot(df.corr.s, aes(class.index.bin, mean_Mantel.corr, color=dataset,
                     ymin=mean_Mantel.corr-sd_Mantel.corr,
                     ymax=mean_Mantel.corr+sd_Mantel.corr)) +
    geom_pointrange() +
    labs(x='Class index (binned; 12 bins)', y='Mantel correlation coef.') +
    theme_bw() +
    theme(
        text = element_text(size=16)
    )



In [317]:
%%R -h 350

df.corr.lm = inner_join(df.corr, df.corr, c('class.index.bin' = 'class.index.bin')) %>%
    group_by(dataset.x, dataset.y) %>%
    nest() %>%
    mutate(model = purrr::map(data, ~lm(Mantel.corr.y ~ Mantel.corr.x, data=.))) %>%
    unnest(model %>% purrr::map(broom::glance)) %>%
    ungroup() %>%
    select(-data, -model) 
        
        
# table
df.corr.lm %>% dplyr::select(dataset.x, dataset.y, r.squared) %>% print
        
# plotting
ggplot(df.corr.lm, aes(dataset.x, dataset.y, fill=r.squared)) +
    geom_tile() +
    scale_fill_gradient(low='black', high='red') +
    labs(title='BD span') +
    theme(
        text = element_text(size=16)
    )


Source: local data frame [4 x 3]

   dataset.x  dataset.y r.squared
       (chr)      (chr)     (dbl)
1 Simulation Simulation 0.9008990
2 Simulation  Emperical 0.8168361
3  Emperical Simulation 0.8168361
4  Emperical  Emperical 0.9036932

Loading non-PCR subsampled OTU tables


In [176]:
OTU_files = !find $buildDir -name "OTU_abs1e9_sub.txt"
OTU_files


Out[176]:
['/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/7/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/9/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/8/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/3/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/10/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/2/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/1/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/4/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/5/OTU_abs1e9_sub.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/6/OTU_abs1e9_sub.txt']

In [177]:
%%R -i OTU_files
# loading files

df.SIM = list()
for (x in OTU_files){
    df = read.delim(x, sep='\t')
    xx = strsplit(x, '/')[[1]]
    df$SIM_rep = xx[10] %>% as.numeric
    df.SIM[[x]] = df
    }
df.SIM = do.call('rbind', df.SIM)
rownames(df.SIM) = 1:nrow(df.SIM)
df.SIM %>% head(n=3)


  library    fraction                          taxon BD_min BD_mid BD_max count
1       1  -inf-1.660 Acaryochloris_marina_MBIC11017   -Inf  1.659  1.659     0
2       1 1.660-1.664 Acaryochloris_marina_MBIC11017  1.660  1.662  1.664     0
3       1 1.664-1.667 Acaryochloris_marina_MBIC11017  1.664  1.665  1.667     1
     rel_abund SIM_rep
1 0.000000e+00       7
2 0.000000e+00       7
3 4.868786e-05       7

BD range where an OTU is detected

  • Do the simulated OTU BD distributions span the same BD range of the emperical data?

In [178]:
comm_files = !find $buildDir -name "comm.txt"
comm_files


Out[178]:
['/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/7/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/9/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/8/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/3/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/10/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/2/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/1/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/4/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/5/comm.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/6/comm.txt']

In [179]:
%%R -i comm_files

df.SIM.comm = list()
for (x in comm_files){
    df = read.delim(x, sep='\t')
    xx = strsplit(x, '/')[[1]]
    df$SIM_rep = xx[10] %>% as.numeric
    df.SIM.comm[[x]] = df
    }

df.SIM.comm = do.call(rbind, df.SIM.comm)
rownames(df.SIM.comm) = 1:nrow(df.SIM.comm)
df.SIM.comm = df.SIM.comm %>%
    rename('bulk_abund' = rel_abund_perc) %>%
    mutate(bulk_abund = bulk_abund / 100)
df.SIM.comm %>% head(n=3)


  library                   taxon_name bulk_abund rank SIM_rep
1       1 Halobacteriovorax_marinus_SJ 0.05728702    1       7
2       1 Zunongwangia_profunda_SM-A87 0.04295700    2       7
3       1   Staphylococcus_warneri_SG1 0.02965853    3       7

In [180]:
%%R

## joining SIP & comm (pre-fractionation)
df.SIM.j = inner_join(df.SIM, df.SIM.comm, c('library' = 'library',
                                             'taxon' = 'taxon_name',
                                             'SIM_rep' = 'SIM_rep')) %>%
    filter(BD_mid >= min_BD, 
           BD_mid <= max_BD) %>%
    mutate(SIM_rep = SIM_rep %>% as.character)
    
    
df.SIM.j %>% head(n=3)


  library    fraction                          taxon BD_min BD_mid BD_max count
1       1 1.675-1.678 Acaryochloris_marina_MBIC11017  1.675  1.676  1.678     1
2       1 1.678-1.680 Acaryochloris_marina_MBIC11017  1.678  1.679  1.680     1
3       1 1.680-1.686 Acaryochloris_marina_MBIC11017  1.680  1.683  1.686     1
     rel_abund SIM_rep bulk_abund rank
1 5.451374e-05       7 0.00016207 1026
2 7.409054e-05       7 0.00016207 1026
3 4.040731e-05       7 0.00016207 1026

In [181]:
%%R
# calculating BD range
df.SIM.j.f = df.SIM.j %>%
    filter(count > 0) %>%
    group_by(SIM_rep) %>%
    mutate(max_BD_range = max(BD_mid) - min(BD_mid)) %>%
    ungroup() %>%
    group_by(SIM_rep, taxon) %>%
    summarize(mean_bulk_abund = mean(bulk_abund),
              min_BD = min(BD_mid),
              max_BD = max(BD_mid),
              BD_range = max_BD - min_BD,
              BD_range_perc = BD_range / first(max_BD_range) * 100) %>%
    ungroup() 
    
df.SIM.j.f %>% head(n=3) %>% as.data.frame


  SIM_rep                                taxon mean_bulk_abund min_BD max_BD
1       1       Acaryochloris_marina_MBIC11017    0.0073490003  1.677  1.771
2       1       Acetobacterium_woodii_DSM_1030    0.0002365695  1.677  1.771
3       1 Acetobacter_pasteurianus_IFO_3283-03    0.0009053839  1.677  1.771
  BD_range BD_range_perc
1    0.094           100
2    0.094           100
3    0.094           100

In [182]:
%%R -h 300 -w 550
## plotting
ggplot(df.SIM.j.f, aes(mean_bulk_abund, BD_range_perc, color=SIM_rep)) +
    geom_point(alpha=0.5, shape='O') +
    scale_x_log10() +
    scale_y_continuous() +
    labs(x='Pre-fractionation abundance', y='% of total BD range') +
    #geom_vline(xintercept=0.001, linetype='dashed', alpha=0.5) +
    theme_bw() +
    theme(
        text = element_text(size=16),
        panel.grid = element_blank(),
        legend.position = 'none'
        )



In [185]:
%%R -h 300 -w 550
## plotting
ggplot(df.SIM.j.f, aes(mean_bulk_abund, BD_range_perc)) +
    geom_hex() +
    scale_x_log10(limits=c(0.0001, 0.1)) +
    scale_y_continuous() +
    labs(x='Pre-fractionation abundance', y='% of total BD range') +
    #geom_vline(xintercept=0.001, linetype='dashed', alpha=0.5) +
    theme_bw() +
    theme(
        text = element_text(size=16),
        panel.grid = element_blank(),
        legend.position = 'none'
        )


Assessing diversity

Asigning zeros


In [186]:
%%R
# giving value to missing abundances
min.pos.val = df.SIM.j %>%
    filter(rel_abund > 0) %>%
    group_by() %>%
    mutate(min_abund = min(rel_abund)) %>%
    ungroup() %>%
    filter(rel_abund == min_abund)

min.pos.val = min.pos.val[1,'rel_abund'] %>% as.numeric
imp.val = min.pos.val / 10


# convert numbers
df.SIM.j[df.SIM.j$rel_abund == 0, 'abundance'] = imp.val

# another closure operation
df.SIM.j = df.SIM.j %>%
    group_by(SIM_rep, fraction) %>%
    mutate(rel_abund = rel_abund / sum(rel_abund))


# status
cat('Below detection level abundances converted to: ', imp.val, '\n')


Below detection level abundances converted to:  3.408316e-06 

Plotting Shannon diversity for each


In [187]:
%%R
shannon_index_long = function(df, abundance_col, ...){
    # calculating shannon diversity index from a 'long' formated table
    ## community_col = name of column defining communities
    ## abundance_col = name of column defining taxon abundances
    df = df %>% as.data.frame
    cmd = paste0(abundance_col, '/sum(', abundance_col, ')')
    df.s = df %>%
        group_by_(...) %>%
        mutate_(REL_abundance = cmd) %>%
        mutate(pi__ln_pi = REL_abundance * log(REL_abundance),
               shannon = -sum(pi__ln_pi, na.rm=TRUE)) %>%
        ungroup() %>% 
        dplyr::select(-REL_abundance, -pi__ln_pi) %>%
        distinct_(...) 
    return(df.s)
}

In [188]:
%%R -w 800 -h 300
# calculating shannon
df.SIM.shan = shannon_index_long(df.SIM.j, 'count', 'library', 'fraction') %>%
    filter(BD_mid >= min_BD, 
           BD_mid <= max_BD) 

df.SIM.shan.s = df.SIM.shan %>%
    group_by(BD_bin = ntile(BD_mid, 24)) %>%
    summarize(mean_BD = mean(BD_mid),
              mean_shannon = mean(shannon),
              sd_shannon = sd(shannon))

# plotting
p = ggplot(df.SIM.shan.s, aes(mean_BD, mean_shannon, 
                             ymin=mean_shannon-sd_shannon,
                             ymax=mean_shannon+sd_shannon)) +
    geom_pointrange() +
    labs(x='Buoyant density', y='Shannon index') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p


Plotting variance


In [189]:
%%R -w 800 -h 350
df.SIM.j.var = df.SIM.j %>%
    group_by(SIM_rep, fraction) %>%
    mutate(variance = var(rel_abund)) %>%
    ungroup() %>%
    distinct(SIM_rep, fraction) %>%
    select(SIM_rep, fraction, variance, BD_mid)

ggplot(df.SIM.j.var, aes(BD_mid, variance, color=SIM_rep)) +
    geom_point() +
    geom_line() +
    theme_bw() +
    theme(
        text = element_text(size=16)
    )



In [194]:
%%R -w 800 -h 350
df.SIM.j.var.s = df.SIM.j.var %>%
    group_by(BD_bin = ntile(BD_mid, 24)) %>%
    summarize(mean_BD = mean(BD_mid),
              mean_var = mean(variance),
              sd_var = sd(variance)) %>%
    ungroup()

ggplot(df.SIM.j.var.s, aes(mean_BD, mean_var, 
                         ymin=mean_var-sd_var,
                         ymax=mean_var+sd_var)) +
    geom_pointrange() +
    geom_line() +
    theme_bw() +
    theme(
        text = element_text(size=16)
    )


Notes

  • spike at high G+C
    • absence of taxa or presence of taxa at those locations?

Correlograms


In [202]:
%%R
BD.diffs = function(BDs){
    df.BD = expand.grid(BDs, BDs)
    df.BD$diff = df.BD %>% apply(1, diff) %>% abs %>% as.vector    
    df.BD = df.BD %>%
        spread(Var1, diff) 
    rownames(df.BD) = df.BD$Var2
    df.BD$Var2 = NULL
    dist.BD = df.BD %>% as.matrix
    dist.BD[upper.tri(dist.BD, diag=TRUE)] = 0
    dist.BD %>% as.dist
    }


SIM_reps = df.SIM.j$SIM_rep %>% unique
BDs.l = list()
for(sr in SIM_reps){
    tmp = df.SIM.j %>% filter(SIM_rep == sr) 
    BDs.l[[sr]] = tmp$BD_mid %>% unique
}

BD.dist.l = lapply(BDs.l, BD.diffs)
BD.dist.l %>% names


 [1] "7"  "9"  "8"  "3"  "10" "2"  "1"  "4"  "5"  "6" 

Bray-curtis ~ BD_diff


In [251]:
%%R
# calculating fraction-fraction community dissimilarity
vegdist.by = function(sr, df, ...){
    df.w = df %>%
        filter(SIM_rep == sr) %>%
        select(taxon, fraction, abundance) %>%
        spread(taxon, abundance) %>%
        as.data.frame()
    
    rownames(df.w) = df.w$fraction
    df.w$fraction = NULL
    df.w$SIM_rep = NULL
    df.w[is.na(df.w)] = 0
    
    vegan::vegdist(df.w, ...)
}


SIM_reps = df.SIM.j$SIM_rep %>% unique
bray.l = list()
for (sr in SIM_reps){
    bray.l[[sr]] = vegdist.by(sr, df.SIM.j, method='bray')    
}
bray.l %>% names


 [1] "7"  "9"  "8"  "3"  "10" "2"  "1"  "4"  "5"  "6" 

In [252]:
%%R -w 800 -h 750

cal.corr = function(d, BDs, nclass=24, ...){
    d[is.nan(d)] = 0
    stopifnot((d %>% length) == (BDs %>% length)) 
    vegan::mantel.correlog(d, BDs, n.class=nclass)
}

SIM_reps = df.SIM.j$SIM_rep %>% unique
corr.l = list()
for(sr in SIM_reps){
    corr.l[[sr]] = cal.corr(bray.l[[sr]], BD.dist.l[[sr]])
}

par(mfrow=c(4,3))
for (n in names(corr.l) %>% as.numeric %>% sort){
    plot(corr.l[[n]])
    title(n)
}



In [253]:
%%R
# making df of mantel values
corr.l.mantel = list()
for (n in corr.l %>% names){
    corr.l.mantel[[n]] = corr.l[[n]]['mantel.res'][[1]] %>% as.data.frame
    corr.l.mantel[[n]]$Day = n
    colnames(corr.l.mantel[[n]]) = c('class.index', 'n.dist', 'Mantel.corr', 'Pr', 'Pr.corr', 'Day')
}
df.corr = do.call(rbind, corr.l.mantel) %>% as.data.frame
df.corr %>% head


         class.index n.dist Mantel.corr    Pr Pr.corr Day
7.D.cl.1   0.0049375     46  0.37567861 0.001   0.001   7
7.D.cl.2   0.0088125     42  0.20974927 0.001   0.002   7
7.D.cl.3   0.0126875     42  0.02857775 0.295   0.295   7
7.D.cl.4   0.0165625     40 -0.07744649 0.090   0.180   7
7.D.cl.5   0.0204375     36 -0.14364415 0.002   0.006   7
7.D.cl.6   0.0243125     36 -0.10050422 0.025   0.075   7

In [254]:
%%R -w 650 -h 300
# summarizing
df.corr.s = df.corr %>%
    filter(! is.na(Mantel.corr)) %>%
    group_by(bin = ntile(class.index, 12)) %>%
    summarize(n = n(),
              mean.class.index = mean(class.index),
              mean.Mantel.corr = mean(Mantel.corr, na.rm=TRUE),
              sd.Mantel.corr = sd(Mantel.corr, na.rm=TRUE),
              max.Pr.corr = max(Pr.corr, na.rm=TRUE)) %>%
    ungroup() %>%
    mutate(significant = ifelse(max.Pr.corr <= 0.05, TRUE, FALSE))


# plotting
ggplot(df.corr.s, aes(mean.class.index, mean.Mantel.corr, color=significant,
                      ymin=mean.Mantel.corr-sd.Mantel.corr,
                      ymax=mean.Mantel.corr+sd.Mantel.corr)) +
    geom_pointrange() +
    scale_color_discrete('P_corr < 0.05') +
    labs(x='Class index (binned; 12 bins)', y='Mean Mantel correlation\n(+/- stdev)') +
    theme_bw() +
    theme( 
        text = element_text(size=16)
    )


Binary ~ BD_diff


In [255]:
%%R
SIM_reps = df.SIM.j$SIM_rep %>% unique
binary.l = list()
for (sr in SIM_reps){
    binary.l[[sr]] = vegdist.by(sr, df.SIM.j, binary=TRUE)    
}
binary.l %>% names


 [1] "7"  "9"  "8"  "3"  "10" "2"  "1"  "4"  "5"  "6" 

In [256]:
%%R -w 800 -h 750

SIM_reps = df.SIM.j$SIM_rep %>% unique
corr.l = list()
for(sr in SIM_reps){
    corr.l[[sr]] = cal.corr(binary.l[[sr]], BD.dist.l[[sr]])
}

par(mfrow=c(4,3))
for (n in names(corr.l) %>% as.numeric %>% sort){
    plot(corr.l[[n]])
    title(n)
}



In [257]:
%%R
# making df of mantel values
corr.l.mantel = list()
for (n in corr.l %>% names){
    corr.l.mantel[[n]] = corr.l[[n]]['mantel.res'][[1]] %>% as.data.frame
    corr.l.mantel[[n]]$Day = n
    colnames(corr.l.mantel[[n]]) = c('class.index', 'n.dist', 'Mantel.corr', 'Pr', 'Pr.corr', 'Day')
}
df.corr = do.call(rbind, corr.l.mantel) %>% as.data.frame
df.corr %>% head


         class.index n.dist Mantel.corr    Pr Pr.corr Day
7.D.cl.1   0.0049375     46  0.37567861 0.001   0.001   7
7.D.cl.2   0.0088125     42  0.20974927 0.001   0.002   7
7.D.cl.3   0.0126875     42  0.02857775 0.281   0.281   7
7.D.cl.4   0.0165625     40 -0.07744649 0.072   0.144   7
7.D.cl.5   0.0204375     36 -0.14364415 0.003   0.009   7
7.D.cl.6   0.0243125     36 -0.10050422 0.029   0.087   7

In [258]:
%%R -w 650 -h 300
# summarizing
df.corr.s = df.corr %>%
    filter(! is.na(Mantel.corr)) %>%
    group_by(bin = ntile(class.index, 12)) %>%
    summarize(n = n(),
              mean.class.index = mean(class.index),
              mean.Mantel.corr = mean(Mantel.corr, na.rm=TRUE),
              sd.Mantel.corr = sd(Mantel.corr, na.rm=TRUE),
              max.Pr.corr = max(Pr.corr, na.rm=TRUE)) %>%
    ungroup() %>%
    mutate(significant = ifelse(max.Pr.corr <= 0.05, TRUE, FALSE))


# plotting
ggplot(df.corr.s, aes(mean.class.index, mean.Mantel.corr, color=significant,
                      ymin=mean.Mantel.corr-sd.Mantel.corr,
                      ymax=mean.Mantel.corr+sd.Mantel.corr)) +
    geom_pointrange() +
    scale_color_discrete('P_corr < 0.05') +
    labs(x='Class index (binned; 12 bins)', y='Mean Mantel correlation\n(+/- stdev)') +
    theme_bw() +
    theme( 
        text = element_text(size=16)
    )


Plotting absolute abundance distributions


In [195]:
OTU_files = !find $buildDir -name "OTU_abs1e9.txt"
OTU_files


Out[195]:
['/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/7/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/9/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/8/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/3/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/10/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/2/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/1/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/4/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/5/OTU_abs1e9.txt',
 '/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/6/OTU_abs1e9.txt']

In [196]:
%%R -i OTU_files
# loading files

df.abs = list()
for (x in OTU_files){
    SIM_rep = gsub('/home/nick/notebook/SIPSim/dev/fullCyc/n1147_frag_norm_9_2.5_n5/rep3_DBL-comm/', '', x)
    SIM_rep = gsub('/OTU_abs1e9.txt', '', SIM_rep)
    df.abs[[SIM_rep]] = read.delim(x, sep='\t') 
    }
df.abs = do.call('rbind', df.abs)
df.abs$SIM_rep = gsub('\\.[0-9]+$', '', rownames(df.abs))
rownames(df.abs) = 1:nrow(df.abs)
df.abs %>% head(n=3)


  library                          taxon    fraction BD_min BD_mid BD_max count
1       1 Acaryochloris_marina_MBIC11017  -inf-1.660   -Inf  1.659  1.659    10
2       1 Acaryochloris_marina_MBIC11017 1.660-1.664  1.660  1.662  1.664     4
3       1 Acaryochloris_marina_MBIC11017 1.664-1.667  1.664  1.665  1.667     4
     rel_abund SIM_rep
1 2.191036e-05       7
2 3.083517e-05       7
3 4.024712e-05       7

In [197]:
%%R -w 800 

ggplot(df.abs, aes(BD_mid, count, fill=taxon)) +
    geom_area(stat='identity', position='dodge', alpha=0.5) +
    labs(x='Buoyant density', y='Subsampled community\n(absolute abundance)') +
    facet_grid(SIM_rep ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none',
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
        )



In [198]:
%%R -w 800 

p1 = ggplot(df.abs %>% filter(BD_mid < 1.7), aes(BD_mid, count, fill=taxon, color=taxon)) +
    labs(x='Buoyant density', y='Subsampled community\n(absolute abundance)') +
    facet_grid(SIM_rep ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none',
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
        )

p2 = p1 + geom_line(alpha=0.25) + scale_y_log10()
p1 = p1 + geom_area(stat='identity', position='dodge', alpha=0.5) 

grid.arrange(p1, p2, ncol=2)



In [199]:
%%R -w 800 

p1 = ggplot(df.abs %>% filter(BD_mid > 1.72), aes(BD_mid, count, fill=taxon, color=taxon)) +
    labs(x='Buoyant density', y='Subsampled community\n(absolute abundance)') +
    facet_grid(SIM_rep ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none',
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
        )


p2 = p1 + geom_line(alpha=0.25) + scale_y_log10()
p1 = p1 + geom_area(stat='identity', position='dodge', alpha=0.5) 

grid.arrange(p1, p2, ncol=2)



In [ ]: