Description:

Assessing how much diffusion plays a role at extreme cases of small fragment proliferation in the fragment pool.

Workflow:

Fragment GC distributions
Community abundance and incorporation
- Simulate communities for each gradient fraction
Simulate isotope incorp
Creating OTU tables
- Simulating a high abundance of small fragments



In [6]:

    
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome10/smallFragTest/'
SIPSimExe = '/home/nick/notebook/SIPSim/SIPSim'



In [2]:

    
import os
import sys
import numpy as np
import pandas as pd



In [3]:

    
%load_ext rpy2.ipython
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [5]:

    
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)









    





Loading required package: grid

Simulating isotope incorporation



In [7]:

    
import os

# making config file
config = """
[library 1]
  # baseline: no incorp
  
  [[intraPopDist 1]]
  distribution = uniform
  weight = 1

    [[[start]]]

      [[[[interPopDist 1]]]]
        distribution = uniform
        start = 0
        end = 0

    [[[end]]]

      [[[[interPopDist 1]]]]
        distribution = uniform
        start = 0
        end = 0

[library 2]
  # split intra-populations
  ## to get some taxa with split; use inter-pop mixture for 2nd intra-pop mu, where mixture is highly uneven
  
  [[intraPopDist 1]]
  distribution = normal
  weight = 0.5

    [[[mu]]]

      [[[[interPopDist 1]]]]
        distribution = normal
        mu = 90
        sigma = 2

    [[[sigma]]]

      [[[[interPopDist 1]]]]
        distribution = normal
        mu = 5
        sigma = 2

  [[intraPopDist 2]]
  distribution = normal
  weight = 0.5

    [[[mu]]]

      [[[[interPopDist 1]]]]
        distribution = normal
        mu = 5
        sigma = 2

    [[[sigma]]]

      [[[[interPopDist 1]]]]
        distribution = normal
        mu = 5
        sigma = 2


[library 3]
  # split inter-pop distribution (some approx. full; others none)
  
  [[intraPopDist 1]]
  distribution = normal
  weight = 1

    [[[mu]]]

      [[[[interPopDist 1]]]]
        distribution = normal
        mu = 90
        sigma = 2

      # these taxa in the community get no incorp
      [[[[interPopDist 2]]]]
        distribution = uniform
        start = 0
        end = 0

    [[[sigma]]]

      [[[[interPopDist 1]]]]
        distribution = normal
        mu = 5
        sigma = 2
        

"""

outfile = os.path.join(workDir, 'genome10_n3.config')

outf = open(outfile, 'wb')
outf.write(config)
outf.close()

!cd $workDir; \ $SIPSimExe isoIncorp ../genome10_comm_n3.txt genome10_n3.config > genome10_comm_n3_incorp.txt

Simulating genome fragments

uniform distribution of fragment lengths



In [57]:

    
!cd $workDir; \
    $SIPSimExe fragGC ../genomes/genomes10.txt \
    --fp ../genomes/ --fr ../515Fm-927Rm.fna \
    --fld uniform,500,10000 \
    --flr 500,None \
    --np 10 > genome10_ampFragGC_unif5-100.txt









    



Processing: "Roseobacter_litoralis_Och_149"
Processing: "Idiomarina_loihiensis_GSL_199"
Processing: "Bacillus_cereus_F837_76"
Processing: "Micrococcus_luteus_NCTC_2665"
Processing: "Geobacter_lovleyi_SZ"
Processing: "Cyanobium_gracile_PCC_6307"
Processing: "Leuconostoc_citreum_KM20"
Processing: "Escherichia_coli_APEC_O78"
Processing: "Xanthomonas_axonopodis_Xac29-1"
Processing: "Nitrosomonas_europaea_ATCC_19718"

Plotting fragment sizes



In [58]:

    
%%R -i workDir

setwd(workDir)

tbl = read.delim('genome10_ampFragGC_unif5-100.txt', sep='\t')
tbl$fragLength = abs(tbl$fragEnd - tbl$fragStart) + 1



In [59]:

    
%%R -i workDir -h 300 -w 550

ggplot(tbl, aes(fragLength)) +
    geom_histogram(binwidth=100) +
    theme(
        text = element_text(size=16)
    )



In [60]:

    
%%R -i workDir -w 900 

ggplot(tbl, aes(taxon_name, fragLength)) +
    geom_violin() +
    theme(
        text = element_text(size=16),
        axis.text.x = element_text(angle=90, hjust=1)
    )

Creating OTU table & plotting simulated fragment info



In [69]:

    
!cd $workDir; \
    $SIPSimExe OTU_table \
    genome10_ampFragGC_unif5-100.txt \
    ../genome10_comm_n3.txt \
    genome10_comm_n3_incorp.txt \
    ../genome10_comm_n3_fracs.txt \
    --abs_abund 1e8 \
    --log genome10_OTU_abnd1e8_log.txt \
    > genome10_unif5-100_OTU_abnd1e8.txt









    



Processing library: "1"
  Processing taxon: "Bacillus_cereus_F837_76"
    N-fragments:   11300354
    Time elapsed:  207.6 sec
  Processing taxon: "Roseobacter_litoralis_Och_149"
    N-fragments:   10989669
    Time elapsed:  201.4 sec
  Processing taxon: "Xanthomonas_axonopodis_Xac29-1"
    N-fragments:   10687525
    Time elapsed:  197.9 sec
  Processing taxon: "Escherichia_coli_APEC_O78"
    N-fragments:   10393689
    Time elapsed:  189.8 sec
  Processing taxon: "Leuconostoc_citreum_KM20"
    N-fragments:   10107931
    Time elapsed:  183.7 sec
  Processing taxon: "Cyanobium_gracile_PCC_6307"
    N-fragments:   9830030
    Time elapsed:  180.0 sec
  Processing taxon: "Geobacter_lovleyi_SZ"
    N-fragments:   9559769
    Time elapsed:  175.7 sec
  Processing taxon: "Nitrosomonas_europaea_ATCC_19718"
    N-fragments:   9296938
    Time elapsed:  170.2 sec
  Processing taxon: "Micrococcus_luteus_NCTC_2665"
    N-fragments:   9041334
    Time elapsed:  165.6 sec
  Processing taxon: "Idiomarina_loihiensis_GSL_199"
    N-fragments:   8792757
    Time elapsed:  161.6 sec
Processing library: "2"
  Processing taxon: "Bacillus_cereus_F837_76"
    N-fragments:   5188078
    Time elapsed:  103.1 sec
  Processing taxon: "Roseobacter_litoralis_Och_149"
    N-fragments:   17134534
    Time elapsed:  344.7 sec
  Processing taxon: "Xanthomonas_axonopodis_Xac29-1"
    N-fragments:   11505806
    Time elapsed:  228.5 sec
  Processing taxon: "Escherichia_coli_APEC_O78"
    N-fragments:   4251370
    Time elapsed:  84.2 sec
  Processing taxon: "Leuconostoc_citreum_KM20"
    N-fragments:   14040891
    Time elapsed:  286.8 sec
  Processing taxon: "Cyanobium_gracile_PCC_6307"
    N-fragments:   6331173
    Time elapsed:  128.3 sec
  Processing taxon: "Geobacter_lovleyi_SZ"
    N-fragments:   20909802
    Time elapsed:  436.8 sec
  Processing taxon: "Nitrosomonas_europaea_ATCC_19718"
    N-fragments:   7726126
    Time elapsed:  157.5 sec
  Processing taxon: "Micrococcus_luteus_NCTC_2665"
    N-fragments:   9428431
    Time elapsed:  192.8 sec
  Processing taxon: "Idiomarina_loihiensis_GSL_199"
    N-fragments:   3483784
    Time elapsed:  71.1 sec
Processing library: "3"
  Processing taxon: "Bacillus_cereus_F837_76"
    N-fragments:   6571355
    Time elapsed:  133.4 sec
  Processing taxon: "Roseobacter_litoralis_Och_149"
    N-fragments:   16714389
    Time elapsed:  338.2 sec
  Processing taxon: "Xanthomonas_axonopodis_Xac29-1"
    N-fragments:   4523563
    Time elapsed:  90.7 sec
  Processing taxon: "Escherichia_coli_APEC_O78"
    N-fragments:   5452150
    Time elapsed:  110.8 sec
  Processing taxon: "Leuconostoc_citreum_KM20"
    N-fragments:   9546170
    Time elapsed:  197.9 sec
  Processing taxon: "Cyanobium_gracile_PCC_6307"
    N-fragments:   7920307
    Time elapsed:  164.2 sec
  Processing taxon: "Geobacter_lovleyi_SZ"
    N-fragments:   3753130
    Time elapsed:  74.4 sec
  Processing taxon: "Nitrosomonas_europaea_ATCC_19718"
    N-fragments:   11505785
    Time elapsed:  236.1 sec
  Processing taxon: "Micrococcus_luteus_NCTC_2665"
    N-fragments:   13867666
    Time elapsed:  284.8 sec
  Processing taxon: "Idiomarina_loihiensis_GSL_199"
    N-fragments:   20145479
    Time elapsed:  413.0 sec
Fragment info written to: genome10_OTU_abnd1e8_log.txt

Plotting out fragment info



In [62]:

    
%%R -i workDir

# loading file
infile = paste(c(workDir, 'genome10_OTU_abnd1e8_log.txt'), collapse='/')
tbl = read.delim(infile, sep='\t')

# reformat
tbl$taxon = gsub("(.*?_.*?)_(.+)", "\\1\n\\2", tbl$taxon)
tbl$fragment_length = tbl$fragment_length / 1000



In [63]:

    
%%R -w 1000 -h 700
# plot

tbl.l1 = tbl %>% filter(library == 1)

ggplot(tbl.l1, aes(fragment_length, fragment_GC)) +
    geom_density2d() +
    facet_wrap( ~ taxon) +
    labs(x="fragment length [kb]", y="fragment G+C") +
    theme(
        text = element_text(size=18),
        axis.text.x = element_text(angle=90)
    )

Plotting OTU table



In [75]:

    
%%R -i workDir

# loading file
infile = paste(c(workDir, 'genome10_unif5-100_OTU_abnd1e8.txt'), collapse='/')
tbl = read.delim(infile, sep='\t')



In [76]:

    
%%R

# formatting table
tbl$BD_min = gsub('-.+', '', tbl$fractions)
tbl$BD_min = as.numeric(tbl$BD_min)
tbl$BD_max = gsub('.+-', '', tbl$fractions)
tbl$BD_max = as.numeric(tbl$BD_max)



In [77]:

    
%%R

# summarizing counts (should be approx. total abundance)
tbl %>%
    group_by(library) %>%
    summarize(sum(count))









    





Source: local data frame [3 x 2]

  library sum(count)
1       1   99999996
2       2   99999995
3       3   99999994



In [88]:

    
%%R -w 800
# plotting absolute abundances

## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66

## plot
ggplot(tbl, aes(BD_min, count, fill=taxon, group=taxon)) +
    geom_area(stat='identity', alpha=0.5, position='dodge') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    facet_grid(library ~ .) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( text = element_text(size=16) )



In [89]:

    
%%R -w 800

# plotting relative abundances
ggplot(tbl, aes(BD_min, count, fill=taxon, group=taxon)) +
    geom_area(stat='identity', alpha=0.8, position='fill') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    facet_grid(library ~ .) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( text = element_text(size=16) )



In [90]:

    
%%R -w 800
# plotting absolute abundances on log scale

## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66

## plot
ggplot(tbl, aes(BD_min, count, color=taxon, group=taxon)) +
    geom_point(alpha=0.5) +
    geom_line() +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    scale_y_log10() +
    facet_grid(library ~ .) +
    labs(x='Buoyant density') +
    theme( text = element_text(size=16) )

Notes:



In [ ]: