Running SIPSim pipeline to simulate priming_exp gradient dataset

  • Basing simulation params off of priming_exp dataset
    • Basing starting community diversity on mean percent abundances in all fraction samples for the gradient
    • Other parameters are 'default'

Setting variables


In [7]:
workDir = '/home/nick/notebook/SIPSim/dev/priming_exp/validation_sample/X12C.700.45_fracRichness/'
genomeDir = '/home/nick/notebook/SIPSim/dev/priming_exp/genomes/'
allAmpFrags = '/home/nick/notebook/SIPSim/dev/bac_genome1210/validation/ampFrags.pkl'
otuTableFile = '/var/seq_data/priming_exp/data/otu_table.txt'
metaDataFile = '/var/seq_data/priming_exp/data/allsample_metadata_nomock.txt'
primerFile = '/home/nick/notebook/SIPSim/dev/515F-806R.fna'

cdhit_dir = '/home/nick/notebook/SIPSim/dev/priming_exp/CD-HIT/'
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
figureDir = '/home/nick/notebook/SIPSim/figures/'

# simulation params
comm_richness =  6901
seq_per_fraction = ['lognormal', 10.096, 1.116]

# for making genome_map file for genome fragment simulation
taxonMapFile = os.path.join(cdhit_dir, 'target_taxa.txt')
genomeFilterFile = os.path.join(cdhit_dir, 'genomeFile_seqID_filt.txt')
abundFile = os.path.join('/home/nick/notebook/SIPSim/dev/priming_exp/exp_info', 'X12C.700.45_frac_OTU.txt')

# misc
nprocs = 20

Init


In [8]:
import glob
import cPickle as pickle
import copy
from IPython.display import Image

In [9]:
%load_ext rpy2.ipython

In [10]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)


/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘dplyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:stats’:

    filter, lag


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: grid

  res = super(Function, self).__call__(*new_args, **new_kwargs)

In [11]:
if not os.path.isdir(workDir):
    os.makedirs(workDir)

Creating a community file from the fraction relative abundances


In [6]:
%%R -i abundFile
# reading priming experiment OTU table
tbl.abund = read.delim(abundFile, sep='\t')
tbl.abund %>% head


      OTUId mean_perc_abund median_perc_abund max_perc_abund      sample
1     OTU.1     2.191008528       2.109151822    3.455925277 X12C.700.45
2    OTU.10     0.044722200       0.035513830    0.125434932 X12C.700.45
3   OTU.100     0.234795756       0.201043020    0.469029886 X12C.700.45
4  OTU.1000     0.003686293       0.002395669    0.014643969 X12C.700.45
5 OTU.10000     0.002177551       0.002282115    0.002335085 X12C.700.45
6  OTU.1001     0.023347018       0.021015762    0.057093919 X12C.700.45

In [7]:
%%R
tbl.comm = tbl.abund %>%
    rename('taxon_name' = OTUId,
           'rel_abund_perc' = mean_perc_abund) %>%
    select(taxon_name, rel_abund_perc) %>%
    mutate(library = '1',
           rank = row_number(-rel_abund_perc)) %>%
    arrange(rank)
    
tbl.comm %>% head


  taxon_name rel_abund_perc library rank
1      OTU.1      2.1910085       1    1
2      OTU.3      1.3806520       1    2
3      OTU.8      1.3443226       1    3
4      OTU.2      1.2433854       1    4
5     OTU.13      0.9717986       1    5
6     OTU.22      0.9351165       1    6

In [8]:
%%R
# rescaling rel_abund_perc so sum(rel_abund_perc) = 100
tbl.comm = tbl.comm %>%
    group_by(library) %>%
    mutate(total = sum(rel_abund_perc)) %>% 
    ungroup() %>%
    mutate(rel_abund_perc = rel_abund_perc * 100 / total) %>%
    select(library, taxon_name, rel_abund_perc, rank)
    
tbl.comm %>% head


Source: local data frame [6 x 4]

  library taxon_name rel_abund_perc rank
1       1      OTU.1      1.8857213    1
2       1      OTU.3      1.1882769    2
3       1      OTU.8      1.1570095    3
4       1      OTU.2      1.0701365    4
5       1     OTU.13      0.8363917    5
6       1     OTU.22      0.8048207    6

In [9]:
%%R -i comm_richness
# number of OTUs
n.OTUs = tbl.comm$taxon_name %>% unique %>% length
cat('Number of OTUs:', n.OTUs, '\n')

# assertion
cat('Community richness = number of OTUs?  ', comm_richness == n.OTUs, '\n')


Number of OTUs: 6901 
Community richness = number of OTUs?   TRUE 

In [10]:
%%R -i workDir

commFile = paste(c(workDir, 'comm.txt'), collapse='/')
write.table(tbl.comm, commFile, sep='\t', quote=F, row.names=F)

Plotting community distribution


In [11]:
%%R -i workDir

commFile = paste(c(workDir, 'comm.txt'), collapse='/')
comm = read.delim(commFile, sep='\t')
comm %>% head


  library taxon_name rel_abund_perc rank
1       1      OTU.1      1.8857213    1
2       1      OTU.3      1.1882769    2
3       1      OTU.8      1.1570095    3
4       1      OTU.2      1.0701365    4
5       1     OTU.13      0.8363917    5
6       1     OTU.22      0.8048207    6

In [12]:
%%R -w 900 -h 350

ggplot(comm, aes(rank, rel_abund_perc)) +
    geom_point() +
    labs(x='Rank', y='% relative abundance', title='Priming experiment community abundance distribution') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Simulating fragments

Making a genome index file to map genome fasta files to OTUs

  • Will be used for community simulation
  • Just OTUs with association to genomes

In [13]:
%%R -i taxonMapFile -i genomeFilterFile 

taxonMap = read.delim(taxonMapFile, sep='\t') %>%
    select(target_genome, OTU) %>%
    distinct()
taxonMap %>% nrow %>% print
taxonMap %>% head(n=3) %>% print

breaker = '----------------\n'
cat(breaker)

genomeFilter = read.delim(genomeFilterFile, sep='\t', header=F) 
genomeFilter %>% nrow %>% print
genomeFilter %>% head(n=3) %>% print

cat(breaker)

comm = read.delim(commFile, sep='\t') 
comm %>% nrow %>% print
comm %>% head(n=3) %>% print


[1] 236
                               target_genome      OTU
1 CP001738_Thermomonospora_curvata_DSM_43183 OTU.8540
2 CP001738_Thermomonospora_curvata_DSM_43183 OTU.9267
3 CP001738_Thermomonospora_curvata_DSM_43183 OTU.1457
----------------
[1] 187
                                                         V1
1                  CP003093_Pseudoxanthomonas_spadix_BD_a59
2                  CP000511_Mycobacterium_vanbaalenii_PYR_1
3 CP003344_Desulfitobacterium_dichloroeliminans_LMG_P_21439
                                                                                                            V2
1                  /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Pseudoxanthomonas_spadix_BD-a59.fasta
2                  /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Mycobacterium_vanbaalenii_PYR-1.fasta
3 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Desulfitobacterium_dichloroeliminans_LMG_P-21439.fasta
----------------
[1] 6901
  library taxon_name rel_abund_perc rank
1       1      OTU.1       1.885721    1
2       1      OTU.3       1.188277    2
3       1      OTU.8       1.157009    3

In [14]:
%%R
taxonMap$OTU %>% table %>% sort(decreasing=T) %>% head


.
    OTU.1    OTU.10   OTU.101   OTU.102 OTU.10237  OTU.1035 
        1         1         1         1         1         1 

In [15]:
%%R

tbl.j = inner_join(taxonMap, genomeFilter, c('target_genome' = 'V1')) %>%
     rename('fasta_file' = V2) %>%
     select(OTU, fasta_file, target_genome)

tbl.j %>% head(n=3)


       OTU
1 OTU.8540
2 OTU.9267
3 OTU.1457
                                                                                     fasta_file
1 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
2 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
3 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
                               target_genome
1 CP001738_Thermomonospora_curvata_DSM_43183
2 CP001738_Thermomonospora_curvata_DSM_43183
3 CP001738_Thermomonospora_curvata_DSM_43183

In [16]:
%%R
tbl.j$OTU %>% table %>% sort(decreasing=T) %>% head


.
    OTU.1    OTU.10   OTU.101   OTU.102 OTU.10237  OTU.1035 
        1         1         1         1         1         1 

In [17]:
%%R
tbl.j2 = inner_join(tbl.j, comm, c('OTU' = 'taxon_name')) 

n.target.genomes = tbl.j2$OTU %>% unique %>% length
cat('Number of target OTUs: ', n.target.genomes, '\n')
cat('--------', '\n')
tbl.j2 %>% head(n=3)


Number of target OTUs:  196 
-------- 
       OTU
1 OTU.9267
2 OTU.1457
3   OTU.77
                                                                                     fasta_file
1 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
2 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
3   /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Pseudoxanthomonas_spadix_BD-a59.fasta
                               target_genome library rel_abund_perc rank
1 CP001738_Thermomonospora_curvata_DSM_43183       1    0.003132433 4370
2 CP001738_Thermomonospora_curvata_DSM_43183       1    0.011451281 1176
3   CP003093_Pseudoxanthomonas_spadix_BD_a59       1    0.028904303  569

In [18]:
%%R -i workDir

outFile = paste(c(workDir, 'target_genome_index.txt'), collapse='/')
write.table(tbl.j2, outFile, sep='\t', quote=F, row.names=F, col.names=F)

Plotting community abundance distribution of target genomes


In [19]:
%%R -w 900 -h 350

ggplot(tbl.j2, aes(rank, rel_abund_perc)) +
    geom_point(size=3, shape='O', color='red') +
    labs(x='Rank', y='% relative abundance', title='Priming experiment community abundance distribution') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Simulating fragments of genomes that match priming_exp bulk OTUs


In [20]:
!cd $workDir; \
    SIPSim fragments \
    target_genome_index.txt \
    --fp $genomeDir \
    --fr $primerFile \
    --fld skewed-normal,9000,2500,-5 \
    --flr None,None \
    --nf 10000 \
    --np $nprocs \
    2> ampFrags.log \
    > ampFrags.pkl

Appending fragments from randomly selected genomes of total dataset (n=1210)

  • This is to obtain the richness of the bulk soil community
  • Random OTUs will be named after non-target OTUs in comm file

Making list of non-target OTUs


In [21]:
%%R -i workDir
# loading files

## target genome index (just OTUs with associated genome)
inFile = paste(c(workDir, 'target_genome_index.txt'), collapse='/')
tbl.target = read.delim(inFile, sep='\t', header=F)
colnames(tbl.target) = c('OTUId', 'fasta_file', 'genome_name')

## comm file of total community OTUs 
commFile = paste(c(workDir, 'comm.txt'), collapse='/')
tbl.comm = read.delim(commFile, sep='\t')

In [22]:
%%R
# just OTUs w/out an associated genome
tbl.j = anti_join(tbl.comm, tbl.target, c('taxon_name' = 'OTUId'))
n.nontarget.genomes = tbl.j$taxon_name %>% length
cat('Number of non-target genomes: ', n.nontarget.genomes, '\n')
cat('---------\n')
tbl.j %>% head(n=5)


Number of non-target genomes:  6705 
---------
  library taxon_name rel_abund_perc rank
1       1   OTU.8166   0.0004756572 6901
2       1   OTU.8112   0.0004756572 6900
3       1   OTU.7281   0.0004756572 6899
4       1   OTU.7101   0.0004756572 6898
5       1   OTU.6733   0.0004756572 6897

In [23]:
%%R -i comm_richness
# checking assumptions
cat('Target + nonTarget richness = total community richness?: ',
    n.target.genomes + n.nontarget.genomes == comm_richness, '\n')


Target + nonTarget richness = total community richness?:  TRUE 

In [24]:
%%R -i workDir
# writing out non-target OTU file
outFile = paste(c(workDir, 'comm_nonTarget.txt'), collapse='/')
write.table(tbl.j, outFile, sep='\t', quote=F, row.names=F)

Randomly selecting amplicon fragment length-GC KDEs from total genome pool


In [25]:
# List of non-target OTUs
inFile = os.path.join(workDir, 'comm_nonTarget.txt')
nonTarget = pd.read_csv(inFile, sep='\t')['taxon_name'].tolist()

print 'Number of non-target OTUs: {}'.format(len(nonTarget))
nonTarget[:4]


Number of non-target OTUs: 6705
Out[25]:
['OTU.8166', 'OTU.8112', 'OTU.7281', 'OTU.7101']

In [26]:
# loading amplicon fragments from full genome KDE dataset
inFile = os.path.join(workDir, 'ampFrags.pkl')
ampFrag_target = []
with open(inFile, 'rb') as iFH:
    ampFrag_target = pickle.load(iFH)
print 'Target OTU richness: {}'.format(len(ampFrag_target))


Target OTU richness: 196

In [27]:
# loading amplicon fragments from full genome KDE dataset
ampFrag_all = []
with open(allAmpFrags, 'rb') as iFH:
    ampFrag_all = pickle.load(iFH)
print 'Count of frag-GC KDEs for all genomes: {}'.format(len(ampFrag_all))


Count of frag-GC KDEs for all genomes: 1210

In [28]:
# random selection from list
#target_richness = len(ampFrag_target)

target_richness = len(ampFrag_target)
richness_needed = comm_richness - target_richness
print 'Number of random taxa needed to reach richness: {}'.format(richness_needed)

if richness_needed > 0:
    index = range(target_richness)
    index = np.random.choice(index, richness_needed)
    
    ampFrag_rand = []
    for i in index:
        sys.stderr.write('{},'.format(i))
        ampFrag_rand.append(copy.deepcopy(ampFrag_all[i]))
else:
    ampFrag_rand = []


3,2,154,67,51,152,133,99,67,163,176,95,101,27,61,36,165,49,110,190,50,20,7,49,164,137,79,4,9,195,5,30,107,84,161,143,96,18,174,100,147,161,134,145,100,163,128,94,150,114,168,110,51,60,58,73,89,87,189,49,92,11,4,42,132,30,96,3,135,72,193,151,113,79,186,152,120,56,92,155,186,1,73,7,174,74,67,182,157,12,112,140,67,69,159,29,140,89,183,53,110,136,121,32,86,42,87,67,100,63,179,68,43,178,64,78,149,6,1,43,155,47,142,52,39,55,152,109,24,164,138,56,120,105,113,73,64,126,120,160,92,92,39,26,129,0,16,176,75,146,151,39,12,118,73,45,73,164,75,4,66,128,125,159,79,174,16,147,6,109,88,162,61,85,192,191,79,68,105,174,87,175,99,138,141,171,27,90,152,34,110,7,29,179,60,46,97,120,192,14,33,103,165,122,183,103,103,75,76,164,188,139,1,119,32,187,149,30,89,167,56,89,31,162,154,41,137,123,59,110,195,100,104,163,158,138,72,142,150,143,12,135,40,114,168,27,10,63,28,173,94,105,122,67,168,93,63,22,35,193,51,77,98,174,128,72,145,114,144,139,0,161,156,75,111,111,172,64,16,38,0,55,11,66,65,103,91,52,22,72,143,23,12,114,96,97,172,132,120,9,110,99,79,71,91,174,54,120,185,124,137,163,2,47,163,58,153,4,190,153,88,57,52,189,4,28,134,50,173,35,88,60,136,13,31,45,17,98,195,24,92,95,171,156,135,35,28,70,74,84,101,118,35,78,137,84,192,19,182,177,187,134,158,36,31,116,27,73,15,174,83,172,51,193,9,48,55,131,78,92,75,106,148,67,57,17,69,44,113,16,187,49,124,149,28,2,175,90,101,76,161,188,17,135,108,83,184,1,107,64,51,102,100,130,45,139,181,171,160,108,142,29,29,110,176,154,152,97,62,71,145,30,165,116,58,25,35,132,186,4,115,12,155,115,53,23,163,138,118,73,102,63,166,5,29,61,106,135,30,169,76,121,175,4,54,22,101,48,85,146,56,128,51,123,88,64,49,188,147,169,111,29,51,13,100,168,33,195,142,40,157,48,142,37,140,132,71,47,134,182,67,3,10,97,2,139,116,92,171,111,47,24,25,122,123,49,186,91,18,130,57,64,141,119,29,15,89,16,177,125,165,5,154,189,109,0,148,173,175,1,23,122,81,43,50,145,54,186,50,176,17,108,130,66,189,54,173,18,27,39,179,61,85,95,41,18,31,148,37,40,61,13,155,84,36,2,101,191,102,123,58,50,165,163,32,79,125,23,50,114,161,167,89,19,36,194,148,111,77,164,57,94,37,124,72,132,134,85,173,140,60,139,139,49,86,117,30,155,12,98,110,131,30,67,179,116,57,90,157,182,41,87,156,147,117,186,61,171,48,194,181,153,78,153,116,67,154,39,11,26,47,16,33,107,162,68,81,4,130,38,3,34,143,65,176,151,123,73,124,187,114,154,185,43,40,28,168,29,139,95,192,136,41,1,162,38,58,170,177,151,183,135,194,16,185,77,69,130,40,179,29,35,34,80,56,159,169,146,141,49,148,142,142,192,3,123,6,182,170,160,4,54,46,157,136,144,142,26,53,160,33,33,136,43,52,45,100,169,140,118,6,193,4,135,10,40,82,192,161,3,156,182,115,13,116,151,127,15,126,159,122,191,130,173,33,69,181,87,6,9,28,160,187,23,82,96,190,40,2,163,120,184,150,180,78,176,16,13,136,62,60,138,32,185,5,20,118,23,39,4,124,91,90,105,44,37,65,52,10,140,150,153,146,77,100,78,129,51,98,134,102,95,170,14,163,89,67,25,110,143,59,59,167,143,130,134,40,190,77,56,1,189,3,181,87,175,29,60,27,139,146,111,109,111,58,159,118,166,182,170,54,101,104,6,66,10,21,133,35,18,30,125,44,136,193,133,46,69,88,102,168,85,63,55,13,8,48,31,133,179,168,132,147,174,50,54,163,159,192,18,135,146,142,132,71,10,86,16,127,5,179,23,20,36,104,141,78,41,87,1,162,73,86,40,178,191,156,160,181,30,64,171,105,127,188,169,155,91,7,140,47,0,173,104,153,182,42,101,9,179,41,121,78,166,170,142,120,140,181,154,88,158,14,120,94,111,147,34,124,15,168,20,125,127,34,48,108,52,23,129,143,40,73,39,32,106,136,158,23,46,120,80,104,154,56,115,79,87,90,26,139,129,121,82,150,122,119,190,30,115,189,84,69,75,1,168,186,28,176,144,66,66,111,12,121,52,184,85,47,195,149,65,138,61,153,98,58,186,154,125,31,136,138,13,59,101,97,139,139,143,191,179,192,68,1,26,38,180,187,21,87,177,195,75,129,94,136,30,17,111,72,27,79,115,30,106,79,5,162,102,53,39,121,86,5,191,177,153,158,107,43,22,136,181,180,187,167,142,115,178,140,35,110,179,17,42,116,173,161,141,44,88,19,11,103,175,42,77,139,149,17,157,22,57,134,23,99,153,54,136,24,48,182,106,168,20,127,20,173,104,193,172,107,6,26,26,91,116,76,92,195,125,85,37,5,94,12,5,105,122,123,142,182,27,77,159,79,161,186,179,117,8,129,120,108,159,47,183,179,84,90,115,93,133,63,193,1,151,63,130,132,117,149,23,33,100,37,18,91,189,127,76,194,90,166,93,183,154,76,120,131,17,26,105,163,153,146,95,170,95,51,156,170,4,5,43,109,170,173,87,15,14,59,140,153,159,2,5,72,141,113,14,54,152,17,162,58,68,90,31,0,138,41,33,120,153,187,165,169,160,79,48,12,6,8,94,160,72,143,159,75,141,190,143,45,174,134,78,141,36,177,155,47,131,78,73,137,41,140,68,121,166,142,84,31,16,3,22,94,32,172,68,137,161,188,1,93,130,124,135,191,100,192,153,2,29,165,53,90,173,83,23,80,97,30,71,28,107,115,165,109,47,93,43,53,4,42,33,32,50,151,45,60,91,54,6,2,23,65,24,154,11,75,169,40,48,126,84,182,74,173,93,17,195,5,87,162,145,24,83,56,175,89,133,5,79,48,138,189,23,68,159,130,88,68,134,17,36,63,153,36,154,183,65,17,25,57,55,144,13,25,27,26,87,94,38,81,32,106,44,117,146,34,53,107,170,8,184,147,77,87,49,93,163,86,7,138,11,53,0,20,42,184,21,138,45,20,172,132,101,58,56,113,169,87,29,34,139,152,147,24,100,134,77,90,74,64,50,0,177,39,15,151,66,166,151,174,18,129,16,7,47,50,111,193,95,159,185,131,112,184,23,13,162,49,124,10,131,133,108,113,176,121,70,145,184,108,60,159,172,142,73,11,141,156,30,63,170,79,42,69,19,83,192,114,27,9,61,152,161,0,26,161,7,11,96,97,24,169,175,92,43,3,3,179,126,113,27,47,29,29,55,47,161,12,176,141,61,99,177,88,143,58,39,125,41,98,75,140,177,66,163,50,104,155,31,27,174,113,57,110,71,184,150,166,97,125,148,195,136,83,143,64,41,162,14,109,101,75,59,55,40,3,19,134,11,90,143,143,66,166,126,129,112,132,189,182,62,153,175,60,153,192,157,112,82,156,171,189,178,139,89,171,150,125,122,71,82,68,11,177,125,80,130,104,29,129,46,57,72,13,6,162,131,155,50,96,24,148,185,192,33,167,78,112,176,98,176,48,105,15,193,167,33,92,181,140,128,50,139,115,189,182,65,20,77,112,36,184,90,69,153,22,180,75,54,98,118,137,136,126,91,37,190,146,143,111,25,186,37,181,16,22,130,61,36,92,13,146,78,16,153,46,10,33,3,170,56,193,82,98,24,39,178,30,45,11,133,51,5,142,154,11,190,106,175,49,79,84,14,130,26,118,31,121,61,128,134,106,133,147,157,121,25,111,68,4,4,64,168,40,110,105,30,17,63,120,146,33,76,177,188,48,168,164,121,108,65,102,148,62,168,79,67,95,3,132,122,38,6,105,173,142,28,69,154,24,61,13,41,115,188,158,40,106,106,75,178,171,173,189,82,18,116,50,74,155,129,72,99,12,181,28,89,176,191,115,137,48,80,4,66,35,92,45,154,25,76,167,123,151,141,91,41,174,133,112,21,51,62,41,65,72,24,112,121,80,137,39,67,141,45,23,31,146,195,148,90,137,60,71,159,178,34,18,19,46,73,144,67,20,70,119,131,108,147,7,152,172,39,195,31,70,80,166,88,34,15,153,170,99,104,39,109,92,43,114,55,189,50,152,125,54,50,106,42,178,28,17,38,134,63,7,133,176,149,128,22,151,133,90,169,49,178,105,125,168,134,49,130,54,173,86,14,26,53,168,95,57,100,122,111,195,124,121,114,173,109,84,136,41,102,131,63,73,70,159,190,181,134,24,45,30,174,146,104,133,164,94,181,79,24,42,150,44,178,126,62,69,99,54,94,25,102,142,109,186,151,95,41,122,166,94,182,169,147,177,40,134,31,2,123,74,27,119,126,76,26,45,145,141,98,160,193,169,148,112,142,44,46,116,21,113,135,6,181,13,122,81,110,162,48,98,118,121,153,156,170,146,47,88,75,6,32,179,58,188,97,32,90,76,62,81,116,120,87,32,63,171,145,39,134,191,111,135,56,69,90,59,136,53,26,158,102,61,21,154,1,50,62,10,98,182,85,56,178,125,32,3,112,112,84,100,125,67,104,49,163,189,61,2,125,135,71,73,112,66,21,95,98,145,143,103,161,133,104,192,153,79,58,174,78,69,150,18,39,102,83,82,174,28,153,130,135,43,62,107,10,165,170,70,121,133,153,80,107,180,130,165,33,166,50,38,124,39,53,39,10,69,140,145,167,133,12,146,41,172,14,45,125,167,113,157,148,146,126,178,14,138,113,58,87,47,44,55,104,50,50,29,135,150,109,170,153,155,37,159,58,131,78,154,165,163,103,166,113,25,143,17,91,84,56,191,73,12,135,2,32,175,18,101,185,179,60,104,98,121,190,45,97,189,38,23,110,10,140,22,180,71,63,194,94,43,146,90,82,181,5,51,120,94,16,103,114,5,160,92,41,117,40,140,93,83,160,48,67,132,61,5,128,148,31,62,101,71,58,169,30,2,145,101,149,87,138,90,88,184,125,125,155,62,80,182,58,140,26,21,76,178,135,61,178,3,83,68,16,173,102,11,132,1,103,164,96,56,130,76,72,192,133,64,16,75,25,64,56,16,121,29,142,88,139,121,92,191,137,175,168,32,70,32,187,139,109,144,10,150,23,171,171,56,154,38,175,14,146,14,58,44,21,99,20,82,65,130,177,181,141,160,37,120,145,186,18,21,77,172,95,55,179,114,150,186,35,4,19,117,137,41,36,31,130,59,151,59,123,81,75,181,172,84,166,47,81,162,27,176,43,100,52,178,22,194,184,159,19,65,158,159,91,48,51,96,16,5,170,101,7,168,76,164,20,179,135,60,168,172,52,53,19,159,70,193,51,2,84,44,46,71,5,41,75,43,68,158,183,92,145,97,158,121,170,118,93,71,185,184,92,21,162,166,8,101,162,157,8,107,184,87,181,135,135,17,149,94,156,191,90,170,161,144,156,182,170,36,76,55,192,88,23,115,39,70,139,74,139,24,34,149,52,31,20,121,58,175,61,69,51,89,178,2,105,89,146,131,23,59,19,90,54,146,54,35,189,20,69,151,20,104,82,169,74,72,113,105,110,55,84,32,34,88,99,117,7,80,22,108,87,28,145,184,157,127,37,84,118,44,124,147,155,67,157,111,94,100,71,62,133,15,190,195,190,167,35,147,159,106,36,158,25,125,38,138,8,55,52,140,153,139,164,140,120,90,193,148,170,165,180,68,108,147,43,167,70,18,94,149,159,192,25,83,97,111,194,20,86,49,50,25,16,67,91,74,37,44,12,121,104,98,46,7,142,99,42,107,47,46,34,110,185,80,46,96,101,24,100,88,79,45,121,68,97,164,8,92,131,195,72,102,94,70,58,59,193,49,51,9,195,145,195,126,121,183,195,3,137,11,113,109,77,191,106,61,54,50,169,39,157,129,10,22,76,71,11,145,74,176,170,73,33,150,28,32,171,154,101,142,161,35,86,157,94,168,18,154,81,154,22,56,59,156,71,176,24,153,129,27,107,141,118,179,66,26,155,104,155,115,88,63,181,194,47,117,31,171,170,133,114,119,82,60,137,149,28,181,161,25,142,134,77,159,106,133,18,2,49,59,24,45,21,105,192,56,126,89,54,56,58,3,172,121,26,119,43,112,78,162,148,20,46,152,161,118,3,96,37,72,49,45,166,70,66,153,110,24,0,110,187,117,105,141,185,14,22,107,128,64,1,83,109,67,11,27,43,88,158,143,121,102,34,102,127,83,179,155,113,11,74,136,117,46,123,44,67,165,65,63,122,119,170,48,119,168,142,75,98,142,93,123,77,153,131,53,78,169,138,136,139,7,101,48,161,93,68,121,31,121,89,112,131,64,112,176,121,99,151,187,46,47,124,71,119,80,29,85,36,148,170,147,104,77,137,108,182,117,151,94,63,184,181,45,144,63,46,145,70,37,29,165,39,97,183,184,51,10,65,122,13,47,73,13,31,6,43,22,142,96,33,159,171,159,181,43,82,30,13,57,99,119,139,127,32,136,44,60,2,80,54,53,159,109,94,161,115,173,110,14,193,141,179,141,77,136,51,82,25,1,127,73,157,137,82,76,126,65,36,13,92,173,167,162,176,194,41,103,89,107,74,162,186,143,144,176,128,52,25,194,122,100,8,38,165,22,184,29,85,113,114,90,186,36,122,136,31,58,80,58,118,52,103,19,104,145,188,192,34,72,62,142,175,42,14,182,105,41,21,137,124,2,71,166,101,170,105,183,129,190,129,191,148,15,43,101,176,154,176,23,65,104,106,100,25,113,23,47,92,191,182,25,66,50,107,11,16,123,96,15,38,180,156,55,27,116,161,132,23,152,4,9,170,125,173,63,143,85,132,94,117,139,189,67,3,86,119,42,79,111,156,193,85,137,92,5,189,1,186,146,191,92,95,59,178,29,162,48,146,38,184,71,172,44,76,166,134,180,127,109,81,23,90,106,15,3,173,70,173,183,50,119,109,81,141,116,49,107,63,142,134,5,143,46,106,42,47,129,175,118,157,63,68,123,14,99,156,92,100,102,180,53,188,9,26,62,135,115,98,52,164,162,76,41,160,93,121,110,94,119,181,186,18,127,52,131,97,5,115,31,156,4,97,182,123,43,28,124,138,67,123,53,177,132,60,21,124,74,26,30,71,9,112,186,37,97,139,144,92,107,192,99,182,58,66,96,150,110,103,112,147,4,194,182,64,137,48,145,136,80,145,126,65,82,67,14,56,33,172,68,195,12,50,92,76,156,47,41,124,99,95,32,26,108,107,147,128,142,110,95,108,193,48,140,19,122,147,86,59,151,104,82,89,77,50,59,31,53,5,180,23,188,89,112,165,78,65,51,97,106,5,128,134,108,124,1,149,185,29,102,88,190,38,73,31,32,96,90,105,104,111,49,181,53,116,44,172,22,169,155,140,84,91,136,9,65,55,106,165,38,109,74,187,193,2,50,102,195,158,105,121,89,135,129,44,32,167,102,96,129,157,132,84,74,4,140,73,55,182,187,87,91,177,115,12,174,51,130,42,68,78,35,28,165,39,95,110,119,96,35,128,88,186,181,13,47,20,143,100,9,184,42,104,95,139,70,148,135,39,105,94,81,66,120,128,108,186,1,161,116,103,92,16,168,178,83,191,50,115,138,192,27,142,138,130,142,1,34,26,59,93,122,55,134,64,102,147,127,194,195,69,129,178,112,107,128,150,67,2,68,59,175,139,95,181,193,36,183,66,39,194,39,116,51,64,3,175,21,188,78,54,20,46,155,122,143,106,74,153,178,189,153,114,192,130,54,88,104,77,195,168,131,176,32,30,110,166,78,114,103,139,191,108,184,86,50,0,13,73,76,154,182,60,140,153,179,54,68,5,111,73,165,65,60,51,44,156,182,157,183,7,174,4,77,26,186,125,122,159,107,93,51,147,192,173,109,92,44,32,156,121,25,101,170,110,127,110,159,66,58,107,0,122,24,165,37,138,162,43,24,167,81,46,16,164,40,78,163,4,119,174,140,105,99,10,6,170,11,130,92,6,46,63,177,55,73,116,80,135,122,5,65,176,85,53,150,110,175,143,29,165,36,21,50,109,126,70,46,1,66,119,28,10,188,170,16,181,77,55,131,127,47,95,173,112,37,47,100,79,169,57,34,34,18,79,40,121,106,193,140,153,47,87,45,107,99,156,115,149,174,189,136,42,187,121,90,185,11,116,176,74,17,25,120,172,165,8,152,4,173,100,177,73,125,132,66,135,41,96,9,51,184,131,159,74,101,70,81,133,187,129,57,172,186,7,4,27,84,119,192,23,144,36,92,27,179,27,134,21,176,47,0,137,164,134,46,90,186,57,17,11,16,59,54,178,190,53,59,172,189,195,88,138,86,62,127,85,141,185,106,123,43,57,184,146,97,142,21,151,44,124,156,58,111,37,107,92,12,37,34,129,141,192,63,34,81,63,149,19,162,179,168,155,28,35,150,44,150,68,41,121,122,157,16,105,46,131,59,112,41,146,148,172,45,132,167,183,139,57,172,104,7,96,0,15,194,130,143,2,40,93,76,175,97,5,76,156,104,124,58,96,32,31,75,64,41,143,19,80,192,46,94,92,107,175,90,135,82,102,99,15,130,70,65,41,88,84,88,102,21,93,22,8,76,1,65,131,22,190,134,145,127,12,71,89,100,24,16,178,135,87,37,107,62,181,184,38,11,194,127,58,40,90,114,23,58,84,77,48,11,119,91,146,33,106,190,41,124,181,156,86,183,98,132,149,141,142,29,97,194,15,151,77,168,59,28,76,166,33,190,98,53,3,78,178,82,129,67,114,3,156,116,63,117,75,13,144,75,3,100,59,75,170,76,120,44,135,123,148,87,110,34,118,86,142,114,82,174,67,131,192,52,150,132,106,91,98,37,113,183,36,166,88,165,125,178,97,149,85,111,194,17,13,47,162,161,44,126,58,29,157,32,160,27,42,65,140,132,54,155,132,73,63,119,24,110,76,175,72,65,16,137,49,119,26,111,171,171,106,124,24,62,81,20,21,97,30,91,125,76,40,65,15,17,77,182,37,172,1,123,72,84,12,129,149,57,177,189,77,33,172,99,107,193,143,168,170,60,62,123,141,132,99,95,56,46,88,126,56,51,16,3,77,171,172,106,87,81,162,56,171,168,147,134,82,7,90,51,62,106,26,169,107,89,107,111,74,42,119,74,158,8,160,132,78,127,81,65,192,92,74,29,177,132,186,103,39,55,186,141,68,149,194,88,181,121,189,46,107,104,165,159,104,191,127,3,58,100,164,71,187,173,160,193,128,194,119,144,170,186,103,64,47,114,193,65,38,86,60,69,34,30,175,20,46,176,84,37,188,29,189,83,173,74,98,119,72,50,43,39,151,140,3,77,104,146,31,4,47,78,101,182,89,159,135,127,27,31,173,30,65,7,157,81,104,64,107,37,3,85,1,120,133,77,42,33,67,54,172,40,183,128,134,72,136,82,150,6,74,166,171,153,60,4,104,170,34,192,75,168,40,5,172,131,132,121,71,178,60,68,17,53,32,175,88,191,109,128,158,96,24,129,45,134,144,13,141,14,54,114,15,154,132,130,132,79,187,3,124,124,33,42,94,187,51,44,28,89,34,104,103,129,175,130,69,34,106,56,141,175,136,112,184,114,39,118,87,4,23,36,136,134,8,154,155,25,74,108,37,155,35,82,188,84,7,158,22,131,41,113,113,186,100,144,35,59,72,175,113,10,106,26,2,78,119,151,181,103,193,45,146,58,72,158,34,59,143,101,29,98,33,49,159,126,78,63,195,105,1,195,21,186,174,13,147,162,66,6,180,8,15,43,45,174,58,73,88,60,102,45,105,103,6,193,73,148,156,13,179,42,63,26,158,194,11,134,168,37,141,132,29,32,179,97,156,100,174,30,53,183,167,180,18,148,117,168,119,188,167,4,68,124,127,137,157,32,35,67,187,152,113,127,172,45,158,82,13,128,1,58,40,35,126,168,76,32,116,28,126,162,118,47,17,34,148,124,0,167,72,9,69,29,194,95,35,33,22,92,145,168,25,117,33,80,121,84,177,69,192,193,156,39,42,141,192,97,16,98,178,140,152,109,82,189,39,120,35,148,84,143,183,38,164,79,193,189,50,171,195,167,24,14,148,0,55,161,150,84,179,158,32,88,4,14,115,32,71,142,168,191,49,147,174,128,162,73,26,147,148,156,39,59,38,14,177,64,161,190,11,131,87,139,109,20,78,139,63,75,177,23,98,79,48,109,7,179,7,135,20,185,119,58,3,3,79,159,119,150,129,45,147,188,168,9,139,161,23,96,94,22,189,179,147,89,83,77,20,59,96,84,51,55,160,58,19,64,26,133,171,129,42,139,85,115,170,7,137,102,1,142,93,19,139,31,44,170,193,170,105,135,107,151,128,116,26,57,26,153,50,51,109,153,183,73,158,123,146,184,72,83,135,177,21,120,115,111,56,64,176,130,17,94,9,103,178,31,113,60,25,30,173,118,156,143,83,114,73,114,142,170,194,103,48,55,195,58,2,63,0,124,160,77,176,38,71,141,9,31,153,79,13,5,30,179,146,83,138,33,102,23,166,138,3,5,180,151,16,55,176,144,169,126,181,93,124,30,170,14,40,5,93,130,36,43,128,166,68,153,159,47,20,27,180,195,123,165,154,10,72,159,48,22,168,95,33,101,152,60,136,76,185,167,140,154,45,130,188,136,85,69,129,15,56,10,95,98,59,74,76,12,175,165,99,95,165,174,117,26,126,35,63,147,6,32,87,135,18,85,142,60,186,128,87,144,142,7,160,145,40,44,80,53,187,119,100,12,9,42,119,5,60,58,180,104,172,42,135,70,185,52,107,72,83,25,42,113,87,34,50,191,144,28,81,145,126,35,166,156,161,57,189,183,39,126,123,66,145,45,149,89,83,118,23,150,102,173,136,142,7,160,104,36,29,37,110,131,147,47,47,153,116,129,145,115,150,186,143,150,26,191,119,137,170,171,121,76,142,50,30,4,192,186,91,60,195,183,147,117,149,169,104,37,129,139,6,134,121,3,179,126,91,64,133,108,189,3,128,89,90,162,86,181,60,68,23,129,150,177,177,63,110,22,29,51,73,156,102,37,63,78,144,42,42,192,51,31,151,5,183,86,33,173,155,9,103,65,187,192,44,177,25,33,17,21,97,63,60,4,18,115,126,165,115,12,165,84,89,171,147,75,53,21,171,112,63,33,75,192,96,106,53,106,59,76,191,180,34,5,72,134,122,80,124,121,179,162,26,58,77,169,185,175,195,138,65,31,148,20,83,111,26,34,108,62,76,45,123,5,109,88,72,25,54,63,143,59,185,80,14,74,85,118,128,86,84,55,35,73,69,61,80,101,27,6,37,188,154,61,117,143,56,139,133,130,67,30,54,188,183,50,103,186,145,121,158,179,24,108,35,29,131,94,45,60,25,75,150,53,112,156,67,28,103,54,58,27,139,35,1,178,139,45,2,137,151,192,1,63,50,11,118,67,173,38,190,23,68,78,69,38,73,99,67,94,119,162,115,131,94,67,117,39,56,41,16,12,175,33,63,99,11,5,131,47,119,136,73,81,91,143,93,85,1,109,81,191,110,164,106,74,27,170,41,26,193,168,104,150,192,114,99,137,111,52,19,123,139,12,153,67,100,74,187,93,16,138,52,193,121,44,143,68,147,100,189,174,124,181,37,148,133,74,167,60,179,125,110,150,58,65,103,92,183,126,161,53,82,115,105,99,98,100,113,39,11,58,70,160,153,88,44,19,168,164,48,103,186,89,64,117,91,162,137,186,146,2,6,139,142,140,64,189,190,184,190,75,39,37,144,141,89,13,18,100,15,107,95,20,158,57,123,191,44,11,74,32,93,129,93,129,132,21,179,59,150,14,75,92,73,140,167,64,87,120,26,122,125,153,74,184,66,142,190,126,170,148,88,31,80,150,36,195,86,125,25,52,104,165,85,50,99,6,189,28,155,100,102,80,137,173,128,62,47,63,101,118,3,192,66,79,31,81,50,20,50,13,84,51,75,111,102,130,84,17,92,27,190,60,142,99,139,89,171,151,79,50,184,46,19,3,31,72,52,6,190,152,192,82,70,152,22,150,189,52,85,175,92,28,73,159,21,37,145,101,169,57,44,60,87,153,165,14,29,4,153,93,110,29,167,84,195,51,179,176,89,50,162,112,155,78,58,121,13,115,87,9,114,169,7,104,108,111,40,166,108,158,23,48,164,151,145,91,33,6,127,183,48,17,107,150,57,36,101,111,51,108,0,50,64,189,163,193,128,144,30,20,69,194,156,12,171,182,152,52,124,119,90,6,151,82,147,68,159,9,173,52,166,29,98,96,105,135,19,194,93,55,111,62,80,128,47,163,48,118,43,177,161,112,103,29,168,183,165,151,79,106,70,114,166,89,160,71,135,42,23,181,139,18,142,136,35,38,143,13,157,187,96,90,74,162,107,109,117,125,53,113,104,71,126,138,148,105,112,103,42,33,48,192,147,28,29,39,160,97,24,146,150,8,24,157,131,83,7,25,101,155,91,24,56,35,128,45,114,98,139,116,185,132,156,103,18,176,75,26,92,136,106,155,34,17,41,10,38,0,67,89,157,170,66,11,117,93,147,37,134,136,38,158,183,74,170,40,189,179,61,8,79,100,49,45,30,127,76,22,102,78,164,149,61,180,42,11,52,8,126,50,2,109,112,127,162,30,39,176,6,22,100,46,39,176,135,184,169,119,137,23,166,84,29,185,3,116,29,179,121,191,24,74,94,189,52,97,92,176,13,141,11,74,110,193,4,129,85,148,105,89,29,2,42,0,163,110,72,146,105,194,47,148,58,17,152,153,15,67,55,64,2,57,7,20,108,42,171,146,154,146,70,104,98,122,152,86,161,105,89,70,193,141,2,193,96,1,0,1,2,175,27,49,54,32,54,97,101,12,14,29,89,96,159,85,32,46,169,99,160,184,102,72,123,162,130,148,41,11,65,54,158,12,144,190,139,82,172,12,41,74,174,152,23,100,141,73,105,15,23,26,165,47,86,62,50,99,9,68,4,15,114,26,19,86,87,59,15,126,151,147,22,59,81,82,176,191,51,35,41,153,171,108,161,74,157,59,75,143,73,153,167,146,139,115,83,44,174,94,148,113,168,70,61,163,113,60,128,49,46,107,112,70,128,30,171,147,124,93,179,33,152,56,137,154,33,186,176,12,120,127,154,98,80,35,192,35,58,135,122,86,61,62,29,47,35,9,94,93,117,117,21,7,61,178,57,52,91,55,58,184,20,58,74,143,175,122,82,172,34,92,171,139,118,91,1,195,2,55,30,37,36,144,194,33,48,160,74,180,110,189,66,0,139,14,195,36,184,72,32,36,76,132,132,45,43,150,139,101,139,160,90,39,181,76,65,36,58,63,176,2,15,37,127,7,140,56,166,33,171,142,46,89,78,174,150,140,118,181,88,69,188,155,3,167,175,1,64,152,88,195,128,40,134,82,21,6,37,12,120,85,105,24,1,140,190,183,17,2,1,162,177,81,168,70,35,91,77,98,150,156,115,12,161,62,109,35,18,6,75,75,13,117,61,124,179,87,185,43,189,54,83,53,84,91,72,105,91,33,117,97,169,178,70,24,74,45,135,81,156,62,43,125,116,74,119,88,59,45,57,164,27,45,8,41,37,177,59,86,147,1,88,14,55,29,188,136,148,137,190,185,1,67,4,182,69,81,21,21,156,66,79,185,41,51,94,42,185,66,169,65,54,160,151,70,36,21,149,159,187,97,92,120,170,45,19,93,116,54,145,65,158,178,45,194,35,90,131,111,181,52,117,157,161,25,115,24,88,154,61,46,106,134,107,145,118,146,190,152,121,69,2,180,137,5,135,52,136,9,49,88,89,119,25,112,71,44,22,15,104,187,97,31,49,58,162,72,82,39,10,122,65,91,92,20,42,55,13,5,176,117,122,164,133,175,127,58,193,92,187,148,131,46,79,118,146,106,179,178,195,104,122,147,55,25,156,24,46,86,160,70,143,88,65,12,140,129,172,73,63,179,103,187,44,85,55,30,48,60,9,47,160,149,16,158,35,96,10,93,54,116,156,28,92,107,49,63,74,175,80,105,106,64,63,128,42,107,167,72,123,99,125,60,155,31,186,105,35,128,93,170,119,32,30,41,60,175,170,62,109,96,51,97,103,97,121,76,164,128,31,150,10,78,39,109,122,159,84,108,71,99,43,60,175,56,179,108,120,120,64,95,123,3,31,49,8,114,147,44,145,20,176,124,153,143,86,119,58,53,130,73,87,152,25,158,128,134,138,155,46,69,48,117,121,151,60,57,138,6,107,188,0,7,28,161,60,56,169,65,22,178,148,112,166,52,68,46,57,51,14,59,172,114,168,112,79,133,195,58,113,183,67,76,87,100,118,34,117,75,161,63,46,15,117,132,154,39,44,97,159,56,2,111,82,162,195,160,66,17,87,156,85,50,146,167,146,57,143,182,51,51,154,194,122,135,175,192,194,40,73,19,17,20,127,126,137,90,148,106,156,83,124,133,41,137,6,72,24,193,113,51,70,8,119,33,114,143,152,167,22,143,30,155,81,176,121,149,90,63,45,40,152,160,116,24,100,147,132,84,83,62,186,28,138,144,163,190,152,47,143,163,148,169,184,67,104,15,3,186,159,18,165,74,86,56,39,150,54,161,138,189,122,176,18,180,74,153,153,130,159,143,70,179,56,29,66,84,133,61,99,22,46,107,47,53,173,168,195,100,188,142,124,99,31,103,162,184,55,172,10,84,169,88,105,17,190,147,118,12,156,120,65,67,79,107,34,73,95,173,131,191,0,82,170,116,55,130,188,18,178,68,8,12,193,67,103,84,14,45,143,85,60,59,
Number of random taxa needed to reach richness: 6705

In [ ]:
# renaming randomly selected KDEs by non-target OTU-ID
for i in range(len(ampFrag_rand)):
    ampFrag_rand[i][0] = nonTarget[i]

In [ ]:
# appending random taxa to target taxa and writing
outFile = os.path.join(workDir, 'ampFrags_wRand.pkl')

with open(outFile, 'wb') as oFH:
    x = ampFrag_target + ampFrag_rand
    print 'Number of taxa in output: {}'.format(len(x))
    pickle.dump(x, oFH)

Converting fragments to kde object


In [ ]:
!cd $workDir; \
    SIPSim fragment_kde \
    ampFrags_wRand.pkl \
    > ampFrags_wRand_kde.pkl

Adding diffusion


In [ ]:
!cd $workDir; \
    SIPSim diffusion \
    ampFrags_wRand_kde.pkl \
    --np $nprocs \
    > ampFrags_wRand_kde_dif.pkl

Making an incorp config file


In [ ]:
!cd $workDir; \
    SIPSim incorpConfigExample \
    --percTaxa 0 \
    --percIncorpUnif 100 \
    > PT0_PI100.config

Adding isotope incorporation to BD distribution


In [ ]:
!cd $workDir; \
    SIPSim isotope_incorp \
    ampFrags_wRand_kde_dif.pkl \
    PT0_PI100.config \
    --comm comm.txt \
    --np $nprocs \
    > ampFrags_wRand_kde_dif_incorp.pkl

Calculating BD shift from isotope incorporation


In [ ]:
!cd $workDir; \
    SIPSim BD_shift \
    ampFrags_wRand_kde_dif.pkl \
    ampFrags_wRand_kde_dif_incorp.pkl \
    --np $nprocs \
    > ampFrags_wRand_kde_dif_incorp_BD-shift.txt

Simulating gradient fractions


In [ ]:
!cd $workDir; \
    SIPSim gradient_fractions \
    comm.txt \
    > fracs.txt

Simulating an OTU table


In [ ]:
!cd $workDir; \
    SIPSim OTU_table \
    ampFrags_wRand_kde_dif_incorp.pkl \
    comm.txt \
    fracs.txt \
    --abs 1e9 \
    --np $nprocs \
    > OTU_abs1e9.txt

Plotting taxon abundances


In [12]:
%%R -i workDir
setwd(workDir)

# loading file
tbl = read.delim('OTU_abs1e9.txt', sep='\t')

In [13]:
%%R
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66

In [14]:
%%R -w 800 -h 300
# plotting absolute abundances

tbl.s = tbl %>%
    group_by(library, BD_mid) %>%
    summarize(total_count = sum(count))

## plot
p = ggplot(tbl.s, aes(BD_mid, total_count)) +
    geom_area(stat='identity', alpha=0.3, position='dodge') +
    geom_histogram(stat='identity') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16) 
    )
p



In [15]:
%%R -w 800 -h 300
# plotting number of taxa at each BD

tbl.nt = tbl %>%
    filter(count > 0) %>%
    group_by(library, BD_mid) %>%
    summarize(n_taxa = n())

## plot
p = ggplot(tbl.nt, aes(BD_mid, n_taxa)) +
    geom_area(stat='identity', alpha=0.3, position='dodge') +
    geom_histogram(stat='identity') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density', y='Number of taxa') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p



In [16]:
%%R -w 800 -h 250
# plotting relative abundances

## plot
p = ggplot(tbl, aes(BD_mid, count, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity', position='dodge', alpha=0.5)



In [17]:
%%R -w 800 -h 250

p + geom_area(stat='identity', position='fill')


Subsampling from the OTU table


In [ ]:
dist,loc,scale = seq_per_fraction

!cd $workDir; \
    SIPSim OTU_subsample \
    --dist $dist \
    --dist_params mean:$loc,sigma:$scale \
    --walk 2 \
    --min_size 10000 \
    --max_size 200000 \
    OTU_abs1e9.txt \
    > OTU_abs1e9_sub.txt

Testing/Plotting seq count distribution of subsampled fraction samples


In [18]:
%%R -h 300 -i workDir
setwd(workDir)

tbl = read.csv('OTU_abs1e9_sub.txt', sep='\t') 

tbl.s = tbl %>% 
    group_by(library, fraction) %>%
    summarize(total_count = sum(count)) %>%
    ungroup() %>%
    mutate(library = as.character(library))

ggplot(tbl.s, aes(total_count)) +
    geom_density(fill='blue')



In [19]:
%%R -h 300 -w 600
setwd(workDir)

tbl.s = tbl %>%
    group_by(fraction, BD_min, BD_mid, BD_max) %>%
    summarize(total_count = sum(count)) 

ggplot(tbl.s, aes(BD_mid, total_count)) +
    geom_point() +
    geom_line() +
    labs(x='Buoyant density', y='Total sequences') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Getting list of target taxa


In [20]:
%%R -i workDir

inFile = paste(c(workDir, 'target_genome_index.txt'), collapse='/')

tbl.target = read.delim(inFile, sep='\t', header=F)
colnames(tbl.target) = c('OTUId', 'genome_file', 'genome_ID', 'X', 'Y', 'Z')
tbl.target = tbl.target %>% distinct(OTUId)


cat('Number of target OTUs: ', tbl.target$OTUId %>% unique %>% length, '\n')
cat('----------\n')
tbl.target %>% head(n=3)


Number of target OTUs:  196 
----------
     OTUId
1 OTU.9267
2 OTU.1457
3   OTU.77
                                                                                    genome_file
1 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
2 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
3   /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Pseudoxanthomonas_spadix_BD-a59.fasta
                                   genome_ID X           Y    Z
1 CP001738_Thermomonospora_curvata_DSM_43183 1 0.003132433 4370
2 CP001738_Thermomonospora_curvata_DSM_43183 1 0.011451281 1176
3   CP003093_Pseudoxanthomonas_spadix_BD_a59 1 0.028904303  569

Plotting abundance distributions


In [21]:
%%R -w 800 -h 250
# plotting relative abundances

tbl = tbl %>% 
    group_by(fraction) %>%
    mutate(rel_abund = count / sum(count))


## plot
p = ggplot(tbl, aes(BD_mid, count, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity', position='dodge', alpha=0.5)



In [22]:
%%R -w 800 -h 250

p = ggplot(tbl, aes(BD_mid, rel_abund, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity')


Abundance distribution of just target taxa


In [23]:
%%R

targets = tbl.target$OTUId %>% as.vector %>% unique 

tbl.f = tbl %>%
    filter(taxon %in% targets)

tbl.f %>% head


Source: local data frame [6 x 8]
Groups: fraction

  library    fraction taxon BD_min BD_mid BD_max count rel_abund
1       1  -inf-1.660 OTU.1   -Inf  1.659  1.659     0         0
2       1 1.660-1.666 OTU.1  1.660  1.663  1.666     0         0
3       1 1.666-1.671 OTU.1  1.666  1.668  1.671     0         0
4       1 1.671-1.675 OTU.1  1.671  1.673  1.675     0         0
5       1 1.675-1.678 OTU.1  1.675  1.676  1.678     0         0
6       1 1.678-1.681 OTU.1  1.678  1.679  1.681     0         0

In [24]:
%%R -w 800 -h 250
# plotting absolute abundances

## plot
p = ggplot(tbl.f, aes(BD_mid, count, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity', position='dodge', alpha=0.5)



In [25]:
%%R -w 800 -h 250
# plotting relative abundances

p = ggplot(tbl.f, aes(BD_mid, rel_abund, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity')


Plotting 'true' taxon abundance distribution (from priming exp dataset)


In [26]:
%%R -i metaDataFile
# loading priming_exp metadata file

meta = read.delim(metaDataFile, sep='\t')
meta %>% head(n=4)


            Sample FractionNum Bulk Control CC X100 X700 H2O Day Density rep
1 12C.000.28.03.07           7    0       1  1    0    0   0  28  1.7646    
2 12C.000.28.03.08           8    0       1  1    0    0   0  28  1.7614    
3 12C.000.28.03.09           9    0       1  1    0    0   0  28  1.7537    
4 12C.000.28.03.10          10    0       1  1    0    0   0  28  1.7483    
  contolVlabel Treatment
1      control    12C000
2      control    12C000
3      control    12C000
4      control    12C000

In [27]:
%%R -i otuTableFile
# loading priming_exp OTU table 

tbl.otu.true = read.delim(otuTableFile, sep='\t') %>%
    select(OTUId, starts_with('X12C.700.28')) 
tbl.otu.true %>% head(n=3)


     OTUId X12C.700.28.04.05 X12C.700.28.04.04 X12C.700.28.04.21
1 OTU.4776                62               179                87
2 OTU.2864                 1                 0                 0
3 OTU.8170                 0                 0                 0
  X12C.700.28.04.09 X12C.700.28.04.11 X12C.700.28.04.14 X12C.700.28.04.NA
1                63                31                 9                54
2                 0                 0                 0                 0
3                 0                 0                 0                 0
  X12C.700.28.04.08 X12C.700.28.04.12 X12C.700.28.04.20 X12C.700.28.04.02
1                69                29               294               338
2                 0                 2                 0                 1
3                 0                 0                 0                 0
  X12C.700.28.04.10 X12C.700.28.04.13 X12C.700.28.04.06 X12C.700.28.04.03
1                75                11                66                74
2                 0                 0                 0                 0
3                 0                 0                 0                 0
  X12C.700.28.03.NA X12C.700.28.04.15 X12C.700.28.04.18 X12C.700.28.04.07
1                82                 3               642                13
2                 0                 0                 1                 0
3                 0                 0                 0                 0
  X12C.700.28.04.19 X12C.700.28.04.16 X12C.700.28.04.17
1               132               142               292
2                 0                 1                 0
3                 0                 0                 0

In [28]:
%%R
# editing table
tbl.otu.true.w = tbl.otu.true %>%
    gather('sample', 'count', 2:ncol(tbl.otu.true)) %>%
    mutate(sample = gsub('^X', '', sample)) %>%
    group_by(sample) %>%
    mutate(rel_abund = count / sum(count)) %>%
    ungroup() %>%
    filter(count > 0)
tbl.otu.true.w %>% head(n=5)


Source: local data frame [5 x 4]

     OTUId           sample count    rel_abund
1 OTU.4776 12C.700.28.04.05    62 2.228532e-03
2 OTU.2864 12C.700.28.04.05     1 3.594407e-05
3 OTU.6001 12C.700.28.04.05     1 3.594407e-05
4  OTU.611 12C.700.28.04.05     7 2.516085e-04
5  OTU.961 12C.700.28.04.05     4 1.437763e-04

In [29]:
%%R
tbl.true.j = inner_join(tbl.otu.true.w, meta, c('sample' = 'Sample'))
tbl.true.j %>% as.data.frame %>% head(n=3)


     OTUId           sample count    rel_abund FractionNum Bulk Control CC X100
1 OTU.4776 12C.700.28.04.05    62 2.228532e-03           5    0       1  0    0
2 OTU.2864 12C.700.28.04.05     1 3.594407e-05           5    0       1  0    0
3 OTU.6001 12C.700.28.04.05     1 3.594407e-05           5    0       1  0    0
  X700 H2O Day Density rep contolVlabel Treatment
1    1   0  28  1.7515          control    12C700
2    1   0  28  1.7515          control    12C700
3    1   0  28  1.7515          control    12C700

In [30]:
%%R -w 800 -h 300  -i workDir
# plotting number of taxa at each BD

tbl = read.csv('OTU_abs1e9_sub.txt', sep='\t') 

tbl.nt = tbl %>%
    filter(count > 0) %>%
    group_by(library, BD_mid) %>%
    summarize(n_taxa = n())

## plot
p = ggplot(tbl.nt, aes(BD_mid, n_taxa)) +
    geom_area(stat='identity', alpha=0.5) +
    geom_point() +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density', y='Number of taxa') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p



In [31]:
%%R -w 700 -h 350

tbl.true.j.s = tbl.true.j %>%
    filter(count > 0) %>%
    group_by(sample, Density) %>%
    summarize(n_taxa = sum(count > 0))

ggplot(tbl.true.j.s, aes(Density, n_taxa)) +
    geom_area(stat='identity', alpha=0.5) +
    geom_point() +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density', y='Number of taxa') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )


Plotting total counts for each sample


In [32]:
%%R -h 300 -w 600
tbl.true.j.s = tbl.true.j %>%
    group_by(sample, Density) %>%
    summarize(total_count = sum(count)) 

ggplot(tbl.true.j.s, aes(Density, total_count)) +
    geom_point() +
    geom_line() +
    labs(x='Buoyant density', y='Total sequences') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Plotting abundance distribution of target OTUs


In [33]:
%%R
tbl.true.j.f = tbl.true.j %>%
    filter(OTUId %in% targets) %>%
    arrange(OTUId, Density) %>%
    group_by(sample)
tbl.true.j.f %>% head(n=3) %>% as.data.frame


  OTUId           sample count   rel_abund FractionNum Bulk Control CC X100
1 OTU.1 12C.700.28.04.21   251 0.011817882          21    0       1  0    0
2 OTU.1 12C.700.28.04.20   639 0.009510061          20    0       1  0    0
3 OTU.1 12C.700.28.04.19   132 0.007396201          19    0       1  0    0
  X700 H2O Day Density rep contolVlabel Treatment
1    1   0  28  1.6794          control    12C700
2    1   0  28  1.6827          control    12C700
3    1   0  28  1.6871          control    12C700

In [34]:
%%R -w 800 -h 250
# plotting relative abundances

## plot
ggplot(tbl.true.j.f, aes(Density, rel_abund, fill=OTUId)) +
    geom_area(stat='identity') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )


Combining true and simulated OTU tables for target taxa


In [35]:
%%R
tbl.f.e = tbl.f %>%
    mutate(library = 'simulation') %>%
    rename('density' = BD_mid) %>%
    select(-BD_min, -BD_max)

tbl.true.e = tbl.true.j.f %>% 
    select('taxon' = OTUId,
           'fraction' = sample,
           'density' = Density,
           count, rel_abund) %>%
    mutate(library = 'true') 
           
           
tbl.sim.true = rbind(tbl.f.e, tbl.true.e) %>% as.data.frame
tbl.f.e = data.frame()
tbl.true.e = data.frame()

tbl.sim.true %>% head(n=3)


     library    fraction taxon density count rel_abund
1 simulation  -inf-1.660 OTU.1   1.659     0         0
2 simulation 1.660-1.666 OTU.1   1.663     0         0
3 simulation 1.666-1.671 OTU.1   1.668     0         0

In [36]:
%%R
# check
cat('Number of target taxa: ', tbl.sim.true$taxon %>% unique %>% length, '\n')


Number of target taxa:  196 

Abundance distributions of each target taxon


In [37]:
%%R -w 900 -h 3500

tbl.sim.true.f = tbl.sim.true %>%
    ungroup() %>%
    filter(density >= 1.677) %>%
    filter(density <= 1.761) %>%
    group_by(taxon) %>%
    mutate(mean_rel_abund = mean(rel_abund))  %>%
    ungroup()

tbl.sim.true.f$taxon = reorder(tbl.sim.true.f$taxon, -tbl.sim.true.f$mean_rel_abund)

ggplot(tbl.sim.true.f, aes(density, rel_abund, color=library)) +
    geom_point() +
    geom_line() +
    theme_bw() +
    facet_wrap(~ taxon, ncol=4, scales='free_y')



In [38]:
%%R
tbl.otu.true.w %>% 
    filter(OTUId == 'OTU.1') %>%
    as.data.frame()


   OTUId           sample count   rel_abund
1  OTU.1 12C.700.28.04.05   507 0.018223644
2  OTU.1 12C.700.28.04.04  1210 0.017891997
3  OTU.1 12C.700.28.04.21   251 0.011817882
4  OTU.1 12C.700.28.04.09   811 0.018365036
5  OTU.1 12C.700.28.04.11   377 0.016576529
6  OTU.1 12C.700.28.04.14   565 0.018848412
7  OTU.1 12C.700.28.04.NA   309 0.012253153
8  OTU.1 12C.700.28.04.08   721 0.018693285
9  OTU.1 12C.700.28.04.12   620 0.012148049
10 OTU.1 12C.700.28.04.20   639 0.009510061
11 OTU.1 12C.700.28.04.02  1373 0.014942591
12 OTU.1 12C.700.28.04.10  1244 0.019733190
13 OTU.1 12C.700.28.04.13   349 0.014465122
14 OTU.1 12C.700.28.04.06   547 0.017980409
15 OTU.1 12C.700.28.04.03   466 0.015967106
16 OTU.1 12C.700.28.03.NA   165 0.010305415
17 OTU.1 12C.700.28.04.15    77 0.017994859
18 OTU.1 12C.700.28.04.18   348 0.006770560
19 OTU.1 12C.700.28.04.07   139 0.018735679
20 OTU.1 12C.700.28.04.19   132 0.007396201
21 OTU.1 12C.700.28.04.16   473 0.016767104
22 OTU.1 12C.700.28.04.17   221 0.007651560

In [40]:
%%R
tbl.sim.true.f %>%
    filter(taxon == 'OTU.1') %>%
    as.data.frame()


      library         fraction taxon density count    rel_abund mean_rel_abund
1  simulation      1.678-1.681 OTU.1  1.6790     0 0.000000e+00      0.0143209
2  simulation      1.681-1.683 OTU.1  1.6820     0 0.000000e+00      0.0143209
3  simulation      1.683-1.688 OTU.1  1.6850     0 0.000000e+00      0.0143209
4  simulation      1.688-1.694 OTU.1  1.6910     0 0.000000e+00      0.0143209
5  simulation      1.694-1.698 OTU.1  1.6960     0 0.000000e+00      0.0143209
6  simulation      1.698-1.702 OTU.1  1.7000     1 2.822148e-05      0.0143209
7  simulation      1.702-1.706 OTU.1  1.7040    14 4.411950e-04      0.0143209
8  simulation      1.706-1.710 OTU.1  1.7080   362 1.180730e-02      0.0143209
9  simulation      1.710-1.714 OTU.1  1.7120  2745 9.679808e-02      0.0143209
10 simulation      1.714-1.719 OTU.1  1.7160  3343 1.210793e-01      0.0143209
11 simulation      1.719-1.720 OTU.1  1.7200  1133 4.149271e-02      0.0143209
12 simulation      1.720-1.723 OTU.1  1.7220   329 1.263538e-02      0.0143209
13 simulation      1.723-1.727 OTU.1  1.7250    18 7.496876e-04      0.0143209
14 simulation      1.727-1.730 OTU.1  1.7280     2 8.379771e-05      0.0143209
15 simulation      1.730-1.734 OTU.1  1.7320     0 0.000000e+00      0.0143209
16 simulation      1.734-1.740 OTU.1  1.7370     0 0.000000e+00      0.0143209
17 simulation      1.740-1.748 OTU.1  1.7440    44 2.183731e-03      0.0143209
18 simulation      1.748-1.752 OTU.1  1.7500     0 0.000000e+00      0.0143209
19 simulation      1.752-1.755 OTU.1  1.7530     0 0.000000e+00      0.0143209
20 simulation      1.755-1.757 OTU.1  1.7560     0 0.000000e+00      0.0143209
21 simulation      1.757-1.762 OTU.1  1.7600     0 0.000000e+00      0.0143209
22       true 12C.700.28.04.21 OTU.1  1.6794   251 1.181788e-02      0.0143209
23       true 12C.700.28.04.20 OTU.1  1.6827   639 9.510061e-03      0.0143209
24       true 12C.700.28.04.19 OTU.1  1.6871   132 7.396201e-03      0.0143209
25       true 12C.700.28.04.18 OTU.1  1.6914   348 6.770560e-03      0.0143209
26       true 12C.700.28.04.17 OTU.1  1.6958   221 7.651560e-03      0.0143209
27       true 12C.700.28.04.16 OTU.1  1.7013   473 1.676710e-02      0.0143209
28       true 12C.700.28.04.15 OTU.1  1.7045    77 1.799486e-02      0.0143209
29       true 12C.700.28.04.14 OTU.1  1.7089   565 1.884841e-02      0.0143209
30       true 12C.700.28.04.13 OTU.1  1.7133   349 1.446512e-02      0.0143209
31       true 12C.700.28.04.12 OTU.1  1.7177   620 1.214805e-02      0.0143209
32       true 12C.700.28.04.11 OTU.1  1.7220   377 1.657653e-02      0.0143209
33       true 12C.700.28.04.10 OTU.1  1.7264  1244 1.973319e-02      0.0143209
34       true 12C.700.28.04.09 OTU.1  1.7330   811 1.836504e-02      0.0143209
35       true 12C.700.28.04.08 OTU.1  1.7373   721 1.869328e-02      0.0143209
36       true 12C.700.28.04.07 OTU.1  1.7417   139 1.873568e-02      0.0143209
37       true 12C.700.28.04.06 OTU.1  1.7461   547 1.798041e-02      0.0143209
38       true 12C.700.28.04.05 OTU.1  1.7515   507 1.822364e-02      0.0143209
39       true 12C.700.28.04.04 OTU.1  1.7559  1210 1.789200e-02      0.0143209
40       true 12C.700.28.04.03 OTU.1  1.7603   466 1.596711e-02      0.0143209

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: