Running SIPSim pipeline to simulate priming_exp gradient dataset

  • Basing simulation params off of priming_exp dataset
    • Basing starting community diversity on mean percent abundances in all fraction samples for the gradient
    • Other parameters are 'default'

Setting variables


In [7]:
workDir = '/home/nick/notebook/SIPSim/dev/priming_exp/validation_sample/X12C.700.45_fracRichness/'
genomeDir = '/home/nick/notebook/SIPSim/dev/priming_exp/genomes/'
allAmpFrags = '/home/nick/notebook/SIPSim/dev/bac_genome1210/validation/ampFrags.pkl'
otuTableFile = '/var/seq_data/priming_exp/data/otu_table.txt'
metaDataFile = '/var/seq_data/priming_exp/data/allsample_metadata_nomock.txt'
primerFile = '/home/nick/notebook/SIPSim/dev/515F-806R.fna'

cdhit_dir = '/home/nick/notebook/SIPSim/dev/priming_exp/CD-HIT/'
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
figureDir = '/home/nick/notebook/SIPSim/figures/'

# simulation params
comm_richness =  6901
seq_per_fraction = ['lognormal', 10.096, 1.116]

# for making genome_map file for genome fragment simulation
taxonMapFile = os.path.join(cdhit_dir, 'target_taxa.txt')
genomeFilterFile = os.path.join(cdhit_dir, 'genomeFile_seqID_filt.txt')
abundFile = os.path.join('/home/nick/notebook/SIPSim/dev/priming_exp/exp_info', 'X12C.700.45_frac_OTU.txt')

# misc
nprocs = 20

Init


In [8]:
import glob
import cPickle as pickle
import copy
from IPython.display import Image

In [9]:
%load_ext rpy2.ipython

In [10]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)


/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘dplyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:stats’:

    filter, lag


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: grid

  res = super(Function, self).__call__(*new_args, **new_kwargs)

In [11]:
if not os.path.isdir(workDir):
    os.makedirs(workDir)

Creating a community file from the fraction relative abundances


In [6]:
%%R -i abundFile
# reading priming experiment OTU table
tbl.abund = read.delim(abundFile, sep='\t')
tbl.abund %>% head


      OTUId mean_perc_abund median_perc_abund max_perc_abund      sample
1     OTU.1     2.191008528       2.109151822    3.455925277 X12C.700.45
2    OTU.10     0.044722200       0.035513830    0.125434932 X12C.700.45
3   OTU.100     0.234795756       0.201043020    0.469029886 X12C.700.45
4  OTU.1000     0.003686293       0.002395669    0.014643969 X12C.700.45
5 OTU.10000     0.002177551       0.002282115    0.002335085 X12C.700.45
6  OTU.1001     0.023347018       0.021015762    0.057093919 X12C.700.45

In [7]:
%%R
tbl.comm = tbl.abund %>%
    rename('taxon_name' = OTUId,
           'rel_abund_perc' = mean_perc_abund) %>%
    select(taxon_name, rel_abund_perc) %>%
    mutate(library = '1',
           rank = row_number(-rel_abund_perc)) %>%
    arrange(rank)
    
tbl.comm %>% head


  taxon_name rel_abund_perc library rank
1      OTU.1      2.1910085       1    1
2      OTU.3      1.3806520       1    2
3      OTU.8      1.3443226       1    3
4      OTU.2      1.2433854       1    4
5     OTU.13      0.9717986       1    5
6     OTU.22      0.9351165       1    6

In [8]:
%%R
# rescaling rel_abund_perc so sum(rel_abund_perc) = 100
tbl.comm = tbl.comm %>%
    group_by(library) %>%
    mutate(total = sum(rel_abund_perc)) %>% 
    ungroup() %>%
    mutate(rel_abund_perc = rel_abund_perc * 100 / total) %>%
    select(library, taxon_name, rel_abund_perc, rank)
    
tbl.comm %>% head


Source: local data frame [6 x 4]

  library taxon_name rel_abund_perc rank
1       1      OTU.1      1.8857213    1
2       1      OTU.3      1.1882769    2
3       1      OTU.8      1.1570095    3
4       1      OTU.2      1.0701365    4
5       1     OTU.13      0.8363917    5
6       1     OTU.22      0.8048207    6

In [9]:
%%R -i comm_richness
# number of OTUs
n.OTUs = tbl.comm$taxon_name %>% unique %>% length
cat('Number of OTUs:', n.OTUs, '\n')

# assertion
cat('Community richness = number of OTUs?  ', comm_richness == n.OTUs, '\n')


Number of OTUs: 6901 
Community richness = number of OTUs?   TRUE 

In [10]:
%%R -i workDir

commFile = paste(c(workDir, 'comm.txt'), collapse='/')
write.table(tbl.comm, commFile, sep='\t', quote=F, row.names=F)

Plotting community distribution


In [11]:
%%R -i workDir

commFile = paste(c(workDir, 'comm.txt'), collapse='/')
comm = read.delim(commFile, sep='\t')
comm %>% head


  library taxon_name rel_abund_perc rank
1       1      OTU.1      1.8857213    1
2       1      OTU.3      1.1882769    2
3       1      OTU.8      1.1570095    3
4       1      OTU.2      1.0701365    4
5       1     OTU.13      0.8363917    5
6       1     OTU.22      0.8048207    6

In [12]:
%%R -w 900 -h 350

ggplot(comm, aes(rank, rel_abund_perc)) +
    geom_point() +
    labs(x='Rank', y='% relative abundance', title='Priming experiment community abundance distribution') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Simulating fragments

Making a genome index file to map genome fasta files to OTUs

  • Will be used for community simulation
  • Just OTUs with association to genomes

In [13]:
%%R -i taxonMapFile -i genomeFilterFile 

taxonMap = read.delim(taxonMapFile, sep='\t') %>%
    select(target_genome, OTU) %>%
    distinct()
taxonMap %>% nrow %>% print
taxonMap %>% head(n=3) %>% print

breaker = '----------------\n'
cat(breaker)

genomeFilter = read.delim(genomeFilterFile, sep='\t', header=F) 
genomeFilter %>% nrow %>% print
genomeFilter %>% head(n=3) %>% print

cat(breaker)

comm = read.delim(commFile, sep='\t') 
comm %>% nrow %>% print
comm %>% head(n=3) %>% print


[1] 236
                               target_genome      OTU
1 CP001738_Thermomonospora_curvata_DSM_43183 OTU.8540
2 CP001738_Thermomonospora_curvata_DSM_43183 OTU.9267
3 CP001738_Thermomonospora_curvata_DSM_43183 OTU.1457
----------------
[1] 187
                                                         V1
1                  CP003093_Pseudoxanthomonas_spadix_BD_a59
2                  CP000511_Mycobacterium_vanbaalenii_PYR_1
3 CP003344_Desulfitobacterium_dichloroeliminans_LMG_P_21439
                                                                                                            V2
1                  /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Pseudoxanthomonas_spadix_BD-a59.fasta
2                  /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Mycobacterium_vanbaalenii_PYR-1.fasta
3 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Desulfitobacterium_dichloroeliminans_LMG_P-21439.fasta
----------------
[1] 6901
  library taxon_name rel_abund_perc rank
1       1      OTU.1       1.885721    1
2       1      OTU.3       1.188277    2
3       1      OTU.8       1.157009    3

In [14]:
%%R
taxonMap$OTU %>% table %>% sort(decreasing=T) %>% head


.
    OTU.1    OTU.10   OTU.101   OTU.102 OTU.10237  OTU.1035 
        1         1         1         1         1         1 

In [15]:
%%R

tbl.j = inner_join(taxonMap, genomeFilter, c('target_genome' = 'V1')) %>%
     rename('fasta_file' = V2) %>%
     select(OTU, fasta_file, target_genome)

tbl.j %>% head(n=3)


       OTU
1 OTU.8540
2 OTU.9267
3 OTU.1457
                                                                                     fasta_file
1 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
2 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
3 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
                               target_genome
1 CP001738_Thermomonospora_curvata_DSM_43183
2 CP001738_Thermomonospora_curvata_DSM_43183
3 CP001738_Thermomonospora_curvata_DSM_43183

In [16]:
%%R
tbl.j$OTU %>% table %>% sort(decreasing=T) %>% head


.
    OTU.1    OTU.10   OTU.101   OTU.102 OTU.10237  OTU.1035 
        1         1         1         1         1         1 

In [17]:
%%R
tbl.j2 = inner_join(tbl.j, comm, c('OTU' = 'taxon_name')) 

n.target.genomes = tbl.j2$OTU %>% unique %>% length
cat('Number of target OTUs: ', n.target.genomes, '\n')
cat('--------', '\n')
tbl.j2 %>% head(n=3)


Number of target OTUs:  196 
-------- 
       OTU
1 OTU.9267
2 OTU.1457
3   OTU.77
                                                                                     fasta_file
1 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
2 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
3   /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Pseudoxanthomonas_spadix_BD-a59.fasta
                               target_genome library rel_abund_perc rank
1 CP001738_Thermomonospora_curvata_DSM_43183       1    0.003132433 4370
2 CP001738_Thermomonospora_curvata_DSM_43183       1    0.011451281 1176
3   CP003093_Pseudoxanthomonas_spadix_BD_a59       1    0.028904303  569

In [18]:
%%R -i workDir

outFile = paste(c(workDir, 'target_genome_index.txt'), collapse='/')
write.table(tbl.j2, outFile, sep='\t', quote=F, row.names=F, col.names=F)

Plotting community abundance distribution of target genomes


In [19]:
%%R -w 900 -h 350

ggplot(tbl.j2, aes(rank, rel_abund_perc)) +
    geom_point(size=3, shape='O', color='red') +
    labs(x='Rank', y='% relative abundance', title='Priming experiment community abundance distribution') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Simulating fragments of genomes that match priming_exp bulk OTUs


In [20]:
!cd $workDir; \
    SIPSim fragments \
    target_genome_index.txt \
    --fp $genomeDir \
    --fr $primerFile \
    --fld skewed-normal,9000,2500,-5 \
    --flr None,None \
    --nf 10000 \
    --np $nprocs \
    2> ampFrags.log \
    > ampFrags.pkl

Appending fragments from randomly selected genomes of total dataset (n=1210)

  • This is to obtain the richness of the bulk soil community
  • Random OTUs will be named after non-target OTUs in comm file

Making list of non-target OTUs


In [21]:
%%R -i workDir
# loading files

## target genome index (just OTUs with associated genome)
inFile = paste(c(workDir, 'target_genome_index.txt'), collapse='/')
tbl.target = read.delim(inFile, sep='\t', header=F)
colnames(tbl.target) = c('OTUId', 'fasta_file', 'genome_name')

## comm file of total community OTUs 
commFile = paste(c(workDir, 'comm.txt'), collapse='/')
tbl.comm = read.delim(commFile, sep='\t')

In [22]:
%%R
# just OTUs w/out an associated genome
tbl.j = anti_join(tbl.comm, tbl.target, c('taxon_name' = 'OTUId'))
n.nontarget.genomes = tbl.j$taxon_name %>% length
cat('Number of non-target genomes: ', n.nontarget.genomes, '\n')
cat('---------\n')
tbl.j %>% head(n=5)


Number of non-target genomes:  6705 
---------
  library taxon_name rel_abund_perc rank
1       1   OTU.8166   0.0004756572 6901
2       1   OTU.8112   0.0004756572 6900
3       1   OTU.7281   0.0004756572 6899
4       1   OTU.7101   0.0004756572 6898
5       1   OTU.6733   0.0004756572 6897

In [23]:
%%R -i comm_richness
# checking assumptions
cat('Target + nonTarget richness = total community richness?: ',
    n.target.genomes + n.nontarget.genomes == comm_richness, '\n')


Target + nonTarget richness = total community richness?:  TRUE 

In [24]:
%%R -i workDir
# writing out non-target OTU file
outFile = paste(c(workDir, 'comm_nonTarget.txt'), collapse='/')
write.table(tbl.j, outFile, sep='\t', quote=F, row.names=F)

Randomly selecting amplicon fragment length-GC KDEs from total genome pool


In [25]:
# List of non-target OTUs
inFile = os.path.join(workDir, 'comm_nonTarget.txt')
nonTarget = pd.read_csv(inFile, sep='\t')['taxon_name'].tolist()

print 'Number of non-target OTUs: {}'.format(len(nonTarget))
nonTarget[:4]


Number of non-target OTUs: 6705
Out[25]:
['OTU.8166', 'OTU.8112', 'OTU.7281', 'OTU.7101']

In [26]:
# loading amplicon fragments from full genome KDE dataset
inFile = os.path.join(workDir, 'ampFrags.pkl')
ampFrag_target = []
with open(inFile, 'rb') as iFH:
    ampFrag_target = pickle.load(iFH)
print 'Target OTU richness: {}'.format(len(ampFrag_target))


Target OTU richness: 196

In [27]:
# loading amplicon fragments from full genome KDE dataset
ampFrag_all = []
with open(allAmpFrags, 'rb') as iFH:
    ampFrag_all = pickle.load(iFH)
print 'Count of frag-GC KDEs for all genomes: {}'.format(len(ampFrag_all))


Count of frag-GC KDEs for all genomes: 1210

In [28]:
# random selection from list
#target_richness = len(ampFrag_target)

target_richness = len(ampFrag_target)
richness_needed = comm_richness - target_richness
print 'Number of random taxa needed to reach richness: {}'.format(richness_needed)

if richness_needed > 0:
    index = range(target_richness)
    index = np.random.choice(index, richness_needed)
    
    ampFrag_rand = []
    for i in index:
        sys.stderr.write('{},'.format(i))
        ampFrag_rand.append(copy.deepcopy(ampFrag_all[i]))
else:
    ampFrag_rand = []


3,2,154,67,51,152,133,99,67,163,176,95,101,27,61,36,165,49,110,190,50,20,7,49,164,137,79,4,9,195,5,30,107,84,161,143,96,18,174,100,147,161,134,145,100,163,128,94,150,114,168,110,51,60,58,73,89,87,189,49,92,11,4,42,132,30,96,3,135,72,193,151,113,79,186,152,120,56,92,155,186,1,73,7,174,74,67,182,157,12,112,140,67,69,159,29,140,89,183,53,110,136,121,32,86,42,87,67,100,63,179,68,43,178,64,78,149,6,1,43,155,47,142,52,39,55,152,109,24,164,138,56,120,105,113,73,64,126,120,160,92,92,39,26,129,0,16,176,75,146,151,39,12,118,73,45,73,164,75,4,66,128,125,159,79,174,16,147,6,109,88,162,61,85,192,191,79,68,105,174,87,175,99,138,141,171,27,90,152,34,110,7,29,179,60,46,97,120,192,14,33,103,165,122,183,103,103,75,76,164,188,139,1,119,32,187,149,30,89,167,56,89,31,162,154,41,137,123,59,110,195,100,104,163,158,138,72,142,150,143,12,135,40,114,168,27,10,63,28,173,94,105,122,67,168,93,63,22,35,193,51,77,98,174,128,72,145,114,144,139,0,161,156,75,111,111,172,64,16,38,0,55,11,66,65,103,91,52,22,72,143,23,12,114,96,97,172,132,120,9,110,99,79,71,91,174,54,120,185,124,137,163,2,47,163,58,153,4,190,153,88,57,52,189,4,28,134,50,173,35,88,60,136,13,31,45,17,98,195,24,92,95,171,156,135,35,28,70,74,84,101,118,35,78,137,84,192,19,182,177,187,134,158,36,31,116,27,73,15,174,83,172,51,193,9,48,55,131,78,92,75,106,148,67,57,17,69,44,113,16,187,49,124,149,28,2,175,90,101,76,161,188,17,135,108,83,184,1,107,64,51,102,100,130,45,139,181,171,160,108,142,29,29,110,176,154,152,97,62,71,145,30,165,116,58,25,35,132,186,4,115,12,155,115,53,23,163,138,118,73,102,63,166,5,29,61,106,135,30,169,76,121,175,4,54,22,101,48,85,146,56,128,51,123,88,64,49,188,147,169,111,29,51,13,100,168,33,195,142,40,157,48,142,37,140,132,71,47,134,182,67,3,10,97,2,139,116,92,171,111,47,24,25,122,123,49,186,91,18,130,57,64,141,119,29,15,89,16,177,125,165,5,154,189,109,0,148,173,175,1,23,122,81,43,50,145,54,186,50,176,17,108,130,66,189,54,173,18,27,39,179,61,85,95,41,18,31,148,37,40,61,13,155,84,36,2,101,191,102,123,58,50,165,163,32,79,125,23,50,114,161,167,89,19,36,194,148,111,77,164,57,94,37,124,72,132,134,85,173,140,60,139,139,49,86,117,30,155,12,98,110,131,30,67,179,116,57,90,157,182,41,87,156,147,117,186,61,171,48,194,181,153,78,153,116,67,154,39,11,26,47,16,33,107,162,68,81,4,130,38,3,34,143,65,176,151,123,73,124,187,114,154,185,43,40,28,168,29,139,95,192,136,41,1,162,38,58,170,177,151,183,135,194,16,185,77,69,130,40,179,29,35,34,80,56,159,169,146,141,49,148,142,142,192,3,123,6,182,170,160,4,54,46,157,136,144,142,26,53,160,33,33,136,43,52,45,100,169,140,118,6,193,4,135,10,40,82,192,161,3,156,182,115,13,116,151,127,15,126,159,122,191,130,173,33,69,181,87,6,9,28,160,187,23,82,96,190,40,2,163,120,184,150,180,78,176,16,13,136,62,60,138,32,185,5,20,118,23,39,4,124,91,90,105,44,37,65,52,10,140,150,153,146,77,100,78,129,51,98,134,102,95,170,14,163,89,67,25,110,143,59,59,167,143,130,134,40,190,77,56,1,189,3,181,87,175,29,60,27,139,146,111,109,111,58,159,118,166,182,170,54,101,104,6,66,10,21,133,35,18,30,125,44,136,193,133,46,69,88,102,168,85,63,55,13,8,48,31,133,179,168,132,147,174,50,54,163,159,192,18,135,146,142,132,71,10,86,16,127,5,179,23,20,36,104,141,78,41,87,1,162,73,86,40,178,191,156,160,181,30,64,171,105,127,188,169,155,91,7,140,47,0,173,104,153,182,42,101,9,179,41,121,78,166,170,142,120,140,181,154,88,158,14,120,94,111,147,34,124,15,168,20,125,127,34,48,108,52,23,129,143,40,73,39,32,106,136,158,23,46,120,80,104,154,56,115,79,87,90,26,139,129,121,82,150,122,119,190,30,115,189,84,69,75,1,168,186,28,176,144,66,66,111,12,121,52,184,85,47,195,149,65,138,61,153,98,58,186,154,125,31,136,138,13,59,101,97,139,139,143,191,179,192,68,1,26,38,180,187,21,87,177,195,75,129,94,136,30,17,111,72,27,79,115,30,106,79,5,162,102,53,39,121,86,5,191,177,153,158,107,43,22,136,181,180,187,167,142,115,178,140,35,110,179,17,42,116,173,161,141,44,88,19,11,103,175,42,77,139,149,17,157,22,57,134,23,99,153,54,136,24,48,182,106,168,20,127,20,173,104,193,172,107,6,26,26,91,116,76,92,195,125,85,37,5,94,12,5,105,122,123,142,182,27,77,159,79,161,186,179,117,8,129,120,108,159,47,183,179,84,90,115,93,133,63,193,1,151,63,130,132,117,149,23,33,100,37,18,91,189,127,76,194,90,166,93,183,154,76,120,131,17,26,105,163,153,146,95,170,95,51,156,170,4,5,43,109,170,173,87,15,14,59,140,153,159,2,5,72,141,113,14,54,152,17,162,58,68,90,31,0,138,41,33,120,153,187,165,169,160,79,48,12,6,8,94,160,72,143,159,75,141,190,143,45,174,134,78,141,36,177,155,47,131,78,73,137,41,140,68,121,166,142,84,31,16,3,22,94,32,172,68,137,161,188,1,93,130,124,135,191,100,192,153,2,29,165,53,90,173,83,23,80,97,30,71,28,107,115,165,109,47,93,43,53,4,42,33,32,50,151,45,60,91,54,6,2,23,65,24,154,11,75,169,40,48,126,84,182,74,173,93,17,195,5,87,162,145,24,83,56,175,89,133,5,79,48,138,189,23,68,159,130,88,68,134,17,36,63,153,36,154,183,65,17,25,57,55,144,13,25,27,26,87,94,38,81,32,106,44,117,146,34,53,107,170,8,184,147,77,87,49,93,163,86,7,138,11,53,0,20,42,184,21,138,45,20,172,132,101,58,56,113,169,87,29,34,139,152,147,24,100,134,77,90,74,64,50,0,177,39,15,151,66,166,151,174,18,129,16,7,47,50,111,193,95,159,185,131,112,184,23,13,162,49,124,10,131,133,108,113,176,121,70,145,184,108,60,159,172,142,73,11,141,156,30,63,170,79,42,69,19,83,192,114,27,9,61,152,161,0,26,161,7,11,96,97,24,169,175,92,43,3,3,179,126,113,27,47,29,29,55,47,161,12,176,141,61,99,177,88,143,58,39,125,41,98,75,140,177,66,163,50,104,155,31,27,174,113,57,110,71,184,150,166,97,125,148,195,136,83,143,64,41,162,14,109,101,75,59,55,40,3,19,134,11,90,143,143,66,166,126,129,112,132,189,182,62,153,175,60,153,192,157,112,82,156,171,189,178,139,89,171,150,125,122,71,82,68,11,177,125,80,130,104,29,129,46,57,72,13,6,162,131,155,50,96,24,148,185,192,33,167,78,112,176,98,176,48,105,15,193,167,33,92,181,140,128,50,139,115,189,182,65,20,77,112,36,184,90,69,153,22,180,75,54,98,118,137,136,126,91,37,190,146,143,111,25,186,37,181,16,22,130,61,36,92,13,146,78,16,153,46,10,33,3,170,56,193,82,98,24,39,178,30,45,11,133,51,5,142,154,11,190,106,175,49,79,84,14,130,26,118,31,121,61,128,134,106,133,147,157,121,25,111,68,4,4,64,168,40,110,105,30,17,63,120,146,33,76,177,188,48,168,164,121,108,65,102,148,62,168,79,67,95,3,132,122,38,6,105,173,142,28,69,154,24,61,13,41,115,188,158,40,106,106,75,178,171,173,189,82,18,116,50,74,155,129,72,99,12,181,28,89,176,191,115,137,48,80,4,66,35,92,45,154,25,76,167,123,151,141,91,41,174,133,112,21,51,62,41,65,72,24,112,121,80,137,39,67,141,45,23,31,146,195,148,90,137,60,71,159,178,34,18,19,46,73,144,67,20,70,119,131,108,147,7,152,172,39,195,31,70,80,166,88,34,15,153,170,99,104,39,109,92,43,114,55,189,50,152,125,54,50,106,42,178,28,17,38,134,63,7,133,176,149,128,22,151,133,90,169,49,178,105,125,168,134,49,130,54,173,86,14,26,53,168,95,57,100,122,111,195,124,121,114,173,109,84,136,41,102,131,63,73,70,159,190,181,134,24,45,30,174,146,104,133,164,94,181,79,24,42,150,44,178,126,62,69,99,54,94,25,102,142,109,186,151,95,41,122,166,94,182,169,147,177,40,134,31,2,123,74,27,119,126,76,26,45,145,141,98,160,193,169,148,112,142,44,46,116,21,113,135,6,181,13,122,81,110,162,48,98,118,121,153,156,170,146,47,88,75,6,32,179,58,188,97,32,90,76,62,81,116,120,87,32,63,171,145,39,134,191,111,135,56,69,90,59,136,53,26,158,102,61,21,154,1,50,62,10,98,182,85,56,178,125,32,3,112,112,84,100,125,67,104,49,163,189,61,2,125,135,71,73,112,66,21,95,98,145,143,103,161,133,104,192,153,79,58,174,78,69,150,18,39,102,83,82,174,28,153,130,135,43,62,107,10,165,170,70,121,133,153,80,107,180,130,165,33,166,50,38,124,39,53,39,10,69,140,145,167,133,12,146,41,172,14,45,125,167,113,157,148,146,126,178,14,138,113,58,87,47,44,55,104,50,50,29,135,150,109,170,153,155,37,159,58,131,78,154,165,163,103,166,113,25,143,17,91,84,56,191,73,12,135,2,32,175,18,101,185,179,60,104,98,121,190,45,97,189,38,23,110,10,140,22,180,71,63,194,94,43,146,90,82,181,5,51,120,94,16,103,114,5,160,92,41,117,40,140,93,83,160,48,67,132,61,5,128,148,31,62,101,71,58,169,30,2,145,101,149,87,138,90,88,184,125,125,155,62,80,182,58,140,26,21,76,178,135,61,178,3,83,68,16,173,102,11,132,1,103,164,96,56,130,76,72,192,133,64,16,75,25,64,56,16,121,29,142,88,139,121,92,191,137,175,168,32,70,32,187,139,109,144,10,150,23,171,171,56,154,38,175,14,146,14,58,44,21,99,20,82,65,130,177,181,141,160,37,120,145,186,18,21,77,172,95,55,179,114,150,186,35,4,19,117,137,41,36,31,130,59,151,59,123,81,75,181,172,84,166,47,81,162,27,176,43,100,52,178,22,194,184,159,19,65,158,159,91,48,51,96,16,5,170,101,7,168,76,164,20,179,135,60,168,172,52,53,19,159,70,193,51,2,84,44,46,71,5,41,75,43,68,158,183,92,145,97,158,121,170,118,93,71,185,184,92,21,162,166,8,101,162,157,8,107,184,87,181,135,135,17,149,94,156,191,90,170,161,144,156,182,170,36,76,55,192,88,23,115,39,70,139,74,139,24,34,149,52,31,20,121,58,175,61,69,51,89,178,2,105,89,146,131,23,59,19,90,54,146,54,35,189,20,69,151,20,104,82,169,74,72,113,105,110,55,84,32,34,88,99,117,7,80,22,108,87,28,145,184,157,127,37,84,118,44,124,147,155,67,157,111,94,100,71,62,133,15,190,195,190,167,35,147,159,106,36,158,25,125,38,138,8,55,52,140,153,139,164,140,120,90,193,148,170,165,180,68,108,147,43,167,70,18,94,149,159,192,25,83,97,111,194,20,86,49,50,25,16,67,91,74,37,44,12,121,104,98,46,7,142,99,42,107,47,46,34,110,185,80,46,96,101,24,100,88,79,45,121,68,97,164,8,92,131,195,72,102,94,70,58,59,193,49,51,9,195,145,195,126,121,183,195,3,137,11,113,109,77,191,106,61,54,50,169,39,157,129,10,22,76,71,11,145,74,176,170,73,33,150,28,32,171,154,101,142,161,35,86,157,94,168,18,154,81,154,22,56,59,156,71,176,24,153,129,27,107,141,118,179,66,26,155,104,155,115,88,63,181,194,47,117,31,171,170,133,114,119,82,60,137,149,28,181,161,25,142,134,77,159,106,133,18,2,49,59,24,45,21,105,192,56,126,89,54,56,58,3,172,121,26,119,43,112,78,162,148,20,46,152,161,118,3,96,37,72,49,45,166,70,66,153,110,24,0,110,187,117,105,141,185,14,22,107,128,64,1,83,109,67,11,27,43,88,158,143,121,102,34,102,127,83,179,155,113,11,74,136,117,46,123,44,67,165,65,63,122,119,170,48,119,168,142,75,98,142,93,123,77,153,131,53,78,169,138,136,139,7,101,48,161,93,68,121,31,121,89,112,131,64,112,176,121,99,151,187,46,47,124,71,119,80,29,85,36,148,170,147,104,77,137,108,182,117,151,94,63,184,181,45,144,63,46,145,70,37,29,165,39,97,183,184,51,10,65,122,13,47,73,13,31,6,43,22,142,96,33,159,171,159,181,43,82,30,13,57,99,119,139,127,32,136,44,60,2,80,54,53,159,109,94,161,115,173,110,14,193,141,179,141,77,136,51,82,25,1,127,73,157,137,82,76,126,65,36,13,92,173,167,162,176,194,41,103,89,107,74,162,186,143,144,176,128,52,25,194,122,100,8,38,165,22,184,29,85,113,114,90,186,36,122,136,31,58,80,58,118,52,103,19,104,145,188,192,34,72,62,142,175,42,14,182,105,41,21,137,124,2,71,166,101,170,105,183,129,190,129,191,148,15,43,101,176,154,176,23,65,104,106,100,25,113,23,47,92,191,182,25,66,50,107,11,16,123,96,15,38,180,156,55,27,116,161,132,23,152,4,9,170,125,173,63,143,85,132,94,117,139,189,67,3,86,119,42,79,111,156,193,85,137,92,5,189,1,186,146,191,92,95,59,178,29,162,48,146,38,184,71,172,44,76,166,134,180,127,109,81,23,90,106,15,3,173,70,173,183,50,119,109,81,141,116,49,107,63,142,134,5,143,46,106,42,47,129,175,118,157,63,68,123,14,99,156,92,100,102,180,53,188,9,26,62,135,115,98,52,164,162,76,41,160,93,121,110,94,119,181,186,18,127,52,131,97,5,115,31,156,4,97,182,123,43,28,124,138,67,123,53,177,132,60,21,124,74,26,30,71,9,112,186,37,97,139,144,92,107,192,99,182,58,66,96,150,110,103,112,147,4,194,182,64,137,48,145,136,80,145,126,65,82,67,14,56,33,172,68,195,12,50,92,76,156,47,41,124,99,95,32,26,108,107,147,128,142,110,95,108,193,48,140,19,122,147,86,59,151,104,82,89,77,50,59,31,53,5,180,23,188,89,112,165,78,65,51,97,106,5,128,134,108,124,1,149,185,29,102,88,190,38,73,31,32,96,90,105,104,111,49,181,53,116,44,172,22,169,155,140,84,91,136,9,65,55,106,165,38,109,74,187,193,2,50,102,195,158,105,121,89,135,129,44,32,167,102,96,129,157,132,84,74,4,140,73,55,182,187,87,91,177,115,12,174,51,130,42,68,78,35,28,165,39,95,110,119,96,35,128,88,186,181,13,47,20,143,100,9,184,42,104,95,139,70,148,135,39,105,94,81,66,120,128,108,186,1,161,116,103,92,16,168,178,83,191,50,115,138,192,27,142,138,130,142,1,34,26,59,93,122,55,134,64,102,147,127,194,195,69,129,178,112,107,128,150,67,2,68,59,175,139,95,181,193,36,183,66,39,194,39,116,51,64,3,175,21,188,78,54,20,46,155,122,143,106,74,153,178,189,153,114,192,130,54,88,104,77,195,168,131,176,32,30,110,166,78,114,103,139,191,108,184,86,50,0,13,73,76,154,182,60,140,153,179,54,68,5,111,73,165,65,60,51,44,156,182,157,183,7,174,4,77,26,186,125,122,159,107,93,51,147,192,173,109,92,44,32,156,121,25,101,170,110,127,110,159,66,58,107,0,122,24,165,37,138,162,43,24,167,81,46,16,164,40,78,163,4,119,174,140,105,99,10,6,170,11,130,92,6,46,63,177,55,73,116,80,135,122,5,65,176,85,53,150,110,175,143,29,165,36,21,50,109,126,70,46,1,66,119,28,10,188,170,16,181,77,55,131,127,47,95,173,112,37,47,100,79,169,57,34,34,18,79,40,121,106,193,140,153,47,87,45,107,99,156,115,149,174,189,136,42,187,121,90,185,11,116,176,74,17,25,120,172,165,8,152,4,173,100,177,73,125,132,66,135,41,96,9,51,184,131,159,74,101,70,81,133,187,129,57,172,186,7,4,27,84,119,192,23,144,36,92,27,179,27,134,21,176,47,0,137,164,134,46,90,186,57,17,11,16,59,54,178,190,53,59,172,189,195,88,138,86,62,127,85,141,185,106,123,43,57,184,146,97,142,21,151,44,124,156,58,111,37,107,92,12,37,34,129,141,192,63,34,81,63,149,19,162,179,168,155,28,35,150,44,150,68,41,121,122,157,16,105,46,131,59,112,41,146,148,172,45,132,167,183,139,57,172,104,7,96,0,15,194,130,143,2,40,93,76,175,97,5,76,156,104,124,58,96,32,31,75,64,41,143,19,80,192,46,94,92,107,175,90,135,82,102,99,15,130,70,65,41,88,84,88,102,21,93,22,8,76,1,65,131,22,190,134,145,127,12,71,89,100,24,16,178,135,87,37,107,62,181,184,38,11,194,127,58,40,90,114,23,58,84,77,48,11,119,91,146,33,106,190,41,124,181,156,86,183,98,132,149,141,142,29,97,194,15,151,77,168,59,28,76,166,33,190,98,53,3,78,178,82,129,67,114,3,156,116,63,117,75,13,144,75,3,100,59,75,170,76,120,44,135,123,148,87,110,34,118,86,142,114,82,174,67,131,192,52,150,132,106,91,98,37,113,183,36,166,88,165,125,178,97,149,85,111,194,17,13,47,162,161,44,126,58,29,157,32,160,27,42,65,140,132,54,155,132,73,63,119,24,110,76,175,72,65,16,137,49,119,26,111,171,171,106,124,24,62,81,20,21,97,30,91,125,76,40,65,15,17,77,182,37,172,1,123,72,84,12,129,149,57,177,189,77,33,172,99,107,193,143,168,170,60,62,123,141,132,99,95,56,46,88,126,56,51,16,3,77,171,172,106,87,81,162,56,171,168,147,134,82,7,90,51,62,106,26,169,107,89,107,111,74,42,119,74,158,8,160,132,78,127,81,65,192,92,74,29,177,132,186,103,39,55,186,141,68,149,194,88,181,121,189,46,107,104,165,159,104,191,127,3,58,100,164,71,187,173,160,193,128,194,119,144,170,186,103,64,47,114,193,65,38,86,60,69,34,30,175,20,46,176,84,37,188,29,189,83,173,74,98,119,72,50,43,39,151,140,3,77,104,146,31,4,47,78,101,182,89,159,135,127,27,31,173,30,65,7,157,81,104,64,107,37,3,85,1,120,133,77,42,33,67,54,172,40,183,128,134,72,136,82,150,6,74,166,171,153,60,4,104,170,34,192,75,168,40,5,172,131,132,121,71,178,60,68,17,53,32,175,88,191,109,128,158,96,24,129,45,134,144,13,141,14,54,114,15,154,132,130,132,79,187,3,124,124,33,42,94,187,51,44,28,89,34,104,103,129,175,130,69,34,106,56,141,175,136,112,184,114,39,118,87,4,23,36,136,134,8,154,155,25,74,108,37,155,35,82,188,84,7,158,22,131,41,113,113,186,100,144,35,59,72,175,113,10,106,26,2,78,119,151,181,103,193,45,146,58,72,158,34,59,143,101,29,98,33,49,159,126,78,63,195,105,1,195,21,186,174,13,147,162,66,6,180,8,15,43,45,174,58,73,88,60,102,45,105,103,6,193,73,148,156,13,179,42,63,26,158,194,11,134,168,37,141,132,29,32,179,97,156,100,174,30,53,183,167,180,18,148,117,168,119,188,167,4,68,124,127,137,157,32,35,67,187,152,113,127,172,45,158,82,13,128,1,58,40,35,126,168,76,32,116,28,126,162,118,47,17,34,148,124,0,167,72,9,69,29,194,95,35,33,22,92,145,168,25,117,33,80,121,84,177,69,192,193,156,39,42,141,192,97,16,98,178,140,152,109,82,189,39,120,35,148,84,143,183,38,164,79,193,189,50,171,195,167,24,14,148,0,55,161,150,84,179,158,32,88,4,14,115,32,71,142,168,191,49,147,174,128,162,73,26,147,148,156,39,59,38,14,177,64,161,190,11,131,87,139,109,20,78,139,63,75,177,23,98,79,48,109,7,179,7,135,20,185,119,58,3,3,79,159,119,150,129,45,147,188,168,9,139,161,23,96,94,22,189,179,147,89,83,77,20,59,96,84,51,55,160,58,19,64,26,133,171,129,42,139,85,115,170,7,137,102,1,142,93,19,139,31,44,170,193,170,105,135,107,151,128,116,26,57,26,153,50,51,109,153,183,73,158,123,146,184,72,83,135,177,21,120,115,111,56,64,176,130,17,94,9,103,178,31,113,60,25,30,173,118,156,143,83,114,73,114,142,170,194,103,48,55,195,58,2,63,0,124,160,77,176,38,71,141,9,31,153,79,13,5,30,179,146,83,138,33,102,23,166,138,3,5,180,151,16,55,176,144,169,126,181,93,124,30,170,14,40,5,93,130,36,43,128,166,68,153,159,47,20,27,180,195,123,165,154,10,72,159,48,22,168,95,33,101,152,60,136,76,185,167,140,154,45,130,188,136,85,69,129,15,56,10,95,98,59,74,76,12,175,165,99,95,165,174,117,26,126,35,63,147,6,32,87,135,18,85,142,60,186,128,87,144,142,7,160,145,40,44,80,53,187,119,100,12,9,42,119,5,60,58,180,104,172,42,135,70,185,52,107,72,83,25,42,113,87,34,50,191,144,28,81,145,126,35,166,156,161,57,189,183,39,126,123,66,145,45,149,89,83,118,23,150,102,173,136,142,7,160,104,36,29,37,110,131,147,47,47,153,116,129,145,115,150,186,143,150,26,191,119,137,170,171,121,76,142,50,30,4,192,186,91,60,195,183,147,117,149,169,104,37,129,139,6,134,121,3,179,126,91,64,133,108,189,3,128,89,90,162,86,181,60,68,23,129,150,177,177,63,110,22,29,51,73,156,102,37,63,78,144,42,42,192,51,31,151,5,183,86,33,173,155,9,103,65,187,192,44,177,25,33,17,21,97,63,60,4,18,115,126,165,115,12,165,84,89,171,147,75,53,21,171,112,63,33,75,192,96,106,53,106,59,76,191,180,34,5,72,134,122,80,124,121,179,162,26,58,77,169,185,175,195,138,65,31,148,20,83,111,26,34,108,62,76,45,123,5,109,88,72,25,54,63,143,59,185,80,14,74,85,118,128,86,84,55,35,73,69,61,80,101,27,6,37,188,154,61,117,143,56,139,133,130,67,30,54,188,183,50,103,186,145,121,158,179,24,108,35,29,131,94,45,60,25,75,150,53,112,156,67,28,103,54,58,27,139,35,1,178,139,45,2,137,151,192,1,63,50,11,118,67,173,38,190,23,68,78,69,38,73,99,67,94,119,162,115,131,94,67,117,39,56,41,16,12,175,33,63,99,11,5,131,47,119,136,73,81,91,143,93,85,1,109,81,191,110,164,106,74,27,170,41,26,193,168,104,150,192,114,99,137,111,52,19,123,139,12,153,67,100,74,187,93,16,138,52,193,121,44,143,68,147,100,189,174,124,181,37,148,133,74,167,60,179,125,110,150,58,65,103,92,183,126,161,53,82,115,105,99,98,100,113,39,11,58,70,160,153,88,44,19,168,164,48,103,186,89,64,117,91,162,137,186,146,2,6,139,142,140,64,189,190,184,190,75,39,37,144,141,89,13,18,100,15,107,95,20,158,57,123,191,44,11,74,32,93,129,93,129,132,21,179,59,150,14,75,92,73,140,167,64,87,120,26,122,125,153,74,184,66,142,190,126,170,148,88,31,80,150,36,195,86,125,25,52,104,165,85,50,99,6,189,28,155,100,102,80,137,173,128,62,47,63,101,118,3,192,66,79,31,81,50,20,50,13,84,51,75,111,102,130,84,17,92,27,190,60,142,99,139,89,171,151,79,50,184,46,19,3,31,72,52,6,190,152,192,82,70,152,22,150,189,52,85,175,92,28,73,159,21,37,145,101,169,57,44,60,87,153,165,14,29,4,153,93,110,29,167,84,195,51,179,176,89,50,162,112,155,78,58,121,13,115,87,9,114,169,7,104,108,111,40,166,108,158,23,48,164,151,145,91,33,6,127,183,48,17,107,150,57,36,101,111,51,108,0,50,64,189,163,193,128,144,30,20,69,194,156,12,171,182,152,52,124,119,90,6,151,82,147,68,159,9,173,52,166,29,98,96,105,135,19,194,93,55,111,62,80,128,47,163,48,118,43,177,161,112,103,29,168,183,165,151,79,106,70,114,166,89,160,71,135,42,23,181,139,18,142,136,35,38,143,13,157,187,96,90,74,162,107,109,117,125,53,113,104,71,126,138,148,105,112,103,42,33,48,192,147,28,29,39,160,97,24,146,150,8,24,157,131,83,7,25,101,155,91,24,56,35,128,45,114,98,139,116,185,132,156,103,18,176,75,26,92,136,106,155,34,17,41,10,38,0,67,89,157,170,66,11,117,93,147,37,134,136,38,158,183,74,170,40,189,179,61,8,79,100,49,45,30,127,76,22,102,78,164,149,61,180,42,11,52,8,126,50,2,109,112,127,162,30,39,176,6,22,100,46,39,176,135,184,169,119,137,23,166,84,29,185,3,116,29,179,121,191,24,74,94,189,52,97,92,176,13,141,11,74,110,193,4,129,85,148,105,89,29,2,42,0,163,110,72,146,105,194,47,148,58,17,152,153,15,67,55,64,2,57,7,20,108,42,171,146,154,146,70,104,98,122,152,86,161,105,89,70,193,141,2,193,96,1,0,1,2,175,27,49,54,32,54,97,101,12,14,29,89,96,159,85,32,46,169,99,160,184,102,72,123,162,130,148,41,11,65,54,158,12,144,190,139,82,172,12,41,74,174,152,23,100,141,73,105,15,23,26,165,47,86,62,50,99,9,68,4,15,114,26,19,86,87,59,15,126,151,147,22,59,81,82,176,191,51,35,41,153,171,108,161,74,157,59,75,143,73,153,167,146,139,115,83,44,174,94,148,113,168,70,61,163,113,60,128,49,46,107,112,70,128,30,171,147,124,93,179,33,152,56,137,154,33,186,176,12,120,127,154,98,80,35,192,35,58,135,122,86,61,62,29,47,35,9,94,93,117,117,21,7,61,178,57,52,91,55,58,184,20,58,74,143,175,122,82,172,34,92,171,139,118,91,1,195,2,55,30,37,36,144,194,33,48,160,74,180,110,189,66,0,139,14,195,36,184,72,32,36,76,132,132,45,43,150,139,101,139,160,90,39,181,76,65,36,58,63,176,2,15,37,127,7,140,56,166,33,171,142,46,89,78,174,150,140,118,181,88,69,188,155,3,167,175,1,64,152,88,195,128,40,134,82,21,6,37,12,120,85,105,24,1,140,190,183,17,2,1,162,177,81,168,70,35,91,77,98,150,156,115,12,161,62,109,35,18,6,75,75,13,117,61,124,179,87,185,43,189,54,83,53,84,91,72,105,91,33,117,97,169,178,70,24,74,45,135,81,156,62,43,125,116,74,119,88,59,45,57,164,27,45,8,41,37,177,59,86,147,1,88,14,55,29,188,136,148,137,190,185,1,67,4,182,69,81,21,21,156,66,79,185,41,51,94,42,185,66,169,65,54,160,151,70,36,21,149,159,187,97,92,120,170,45,19,93,116,54,145,65,158,178,45,194,35,90,131,111,181,52,117,157,161,25,115,24,88,154,61,46,106,134,107,145,118,146,190,152,121,69,2,180,137,5,135,52,136,9,49,88,89,119,25,112,71,44,22,15,104,187,97,31,49,58,162,72,82,39,10,122,65,91,92,20,42,55,13,5,176,117,122,164,133,175,127,58,193,92,187,148,131,46,79,118,146,106,179,178,195,104,122,147,55,25,156,24,46,86,160,70,143,88,65,12,140,129,172,73,63,179,103,187,44,85,55,30,48,60,9,47,160,149,16,158,35,96,10,93,54,116,156,28,92,107,49,63,74,175,80,105,106,64,63,128,42,107,167,72,123,99,125,60,155,31,186,105,35,128,93,170,119,32,30,41,60,175,170,62,109,96,51,97,103,97,121,76,164,128,31,150,10,78,39,109,122,159,84,108,71,99,43,60,175,56,179,108,120,120,64,95,123,3,31,49,8,114,147,44,145,20,176,124,153,143,86,119,58,53,130,73,87,152,25,158,128,134,138,155,46,69,48,117,121,151,60,57,138,6,107,188,0,7,28,161,60,56,169,65,22,178,148,112,166,52,68,46,57,51,14,59,172,114,168,112,79,133,195,58,113,183,67,76,87,100,118,34,117,75,161,63,46,15,117,132,154,39,44,97,159,56,2,111,82,162,195,160,66,17,87,156,85,50,146,167,146,57,143,182,51,51,154,194,122,135,175,192,194,40,73,19,17,20,127,126,137,90,148,106,156,83,124,133,41,137,6,72,24,193,113,51,70,8,119,33,114,143,152,167,22,143,30,155,81,176,121,149,90,63,45,40,152,160,116,24,100,147,132,84,83,62,186,28,138,144,163,190,152,47,143,163,148,169,184,67,104,15,3,186,159,18,165,74,86,56,39,150,54,161,138,189,122,176,18,180,74,153,153,130,159,143,70,179,56,29,66,84,133,61,99,22,46,107,47,53,173,168,195,100,188,142,124,99,31,103,162,184,55,172,10,84,169,88,105,17,190,147,118,12,156,120,65,67,79,107,34,73,95,173,131,191,0,82,170,116,55,130,188,18,178,68,8,12,193,67,103,84,14,45,143,85,60,59,
Number of random taxa needed to reach richness: 6705

In [ ]:
# renaming randomly selected KDEs by non-target OTU-ID
for i in range(len(ampFrag_rand)):
    ampFrag_rand[i][0] = nonTarget[i]

In [ ]:
# appending random taxa to target taxa and writing
outFile = os.path.join(workDir, 'ampFrags_wRand.pkl')

with open(outFile, 'wb') as oFH:
    x = ampFrag_target + ampFrag_rand
    print 'Number of taxa in output: {}'.format(len(x))
    pickle.dump(x, oFH)

Converting fragments to kde object


In [ ]:
!cd $workDir; \
    SIPSim fragment_kde \
    ampFrags_wRand.pkl \
    > ampFrags_wRand_kde.pkl

Adding diffusion


In [ ]:
!cd $workDir; \
    SIPSim diffusion \
    ampFrags_wRand_kde.pkl \
    --np $nprocs \
    > ampFrags_wRand_kde_dif.pkl

Making an incorp config file


In [ ]:
!cd $workDir; \
    SIPSim incorpConfigExample \
    --percTaxa 0 \
    --percIncorpUnif 100 \
    > PT0_PI100.config

Adding isotope incorporation to BD distribution


In [ ]:
!cd $workDir; \
    SIPSim isotope_incorp \
    ampFrags_wRand_kde_dif.pkl \
    PT0_PI100.config \
    --comm comm.txt \
    --np $nprocs \
    > ampFrags_wRand_kde_dif_incorp.pkl

Calculating BD shift from isotope incorporation


In [ ]:
!cd $workDir; \
    SIPSim BD_shift \
    ampFrags_wRand_kde_dif.pkl \
    ampFrags_wRand_kde_dif_incorp.pkl \
    --np $nprocs \
    > ampFrags_wRand_kde_dif_incorp_BD-shift.txt

Simulating gradient fractions


In [ ]:
!cd $workDir; \
    SIPSim gradient_fractions \
    comm.txt \
    > fracs.txt

Simulating an OTU table


In [ ]:
!cd $workDir; \
    SIPSim OTU_table \
    ampFrags_wRand_kde_dif_incorp.pkl \
    comm.txt \
    fracs.txt \
    --abs 1e9 \
    --np $nprocs \
    > OTU_abs1e9.txt

Plotting taxon abundances


In [12]:
%%R -i workDir
setwd(workDir)

# loading file
tbl = read.delim('OTU_abs1e9.txt', sep='\t')

In [13]:
%%R
## BD for G+C of 0 or 100
BD.GCp0 = 0 * 0.098 + 1.66
BD.GCp100 = 1 * 0.098 + 1.66

In [14]:
%%R -w 800 -h 300
# plotting absolute abundances

tbl.s = tbl %>%
    group_by(library, BD_mid) %>%
    summarize(total_count = sum(count))

## plot
p = ggplot(tbl.s, aes(BD_mid, total_count)) +
    geom_area(stat='identity', alpha=0.3, position='dodge') +
    geom_histogram(stat='identity') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16) 
    )
p



In [15]:
%%R -w 800 -h 300
# plotting number of taxa at each BD

tbl.nt = tbl %>%
    filter(count > 0) %>%
    group_by(library, BD_mid) %>%
    summarize(n_taxa = n())

## plot
p = ggplot(tbl.nt, aes(BD_mid, n_taxa)) +
    geom_area(stat='identity', alpha=0.3, position='dodge') +
    geom_histogram(stat='identity') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density', y='Number of taxa') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p



In [16]:
%%R -w 800 -h 250
# plotting relative abundances

## plot
p = ggplot(tbl, aes(BD_mid, count, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity', position='dodge', alpha=0.5)



In [17]:
%%R -w 800 -h 250

p + geom_area(stat='identity', position='fill')


Subsampling from the OTU table


In [ ]:
dist,loc,scale = seq_per_fraction

!cd $workDir; \
    SIPSim OTU_subsample \
    --dist $dist \
    --dist_params mean:$loc,sigma:$scale \
    --walk 2 \
    --min_size 10000 \
    --max_size 200000 \
    OTU_abs1e9.txt \
    > OTU_abs1e9_sub.txt

Testing/Plotting seq count distribution of subsampled fraction samples


In [18]:
%%R -h 300 -i workDir
setwd(workDir)

tbl = read.csv('OTU_abs1e9_sub.txt', sep='\t') 

tbl.s = tbl %>% 
    group_by(library, fraction) %>%
    summarize(total_count = sum(count)) %>%
    ungroup() %>%
    mutate(library = as.character(library))

ggplot(tbl.s, aes(total_count)) +
    geom_density(fill='blue')



In [19]:
%%R -h 300 -w 600
setwd(workDir)

tbl.s = tbl %>%
    group_by(fraction, BD_min, BD_mid, BD_max) %>%
    summarize(total_count = sum(count)) 

ggplot(tbl.s, aes(BD_mid, total_count)) +
    geom_point() +
    geom_line() +
    labs(x='Buoyant density', y='Total sequences') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )


Getting list of target taxa


In [20]:
%%R -i workDir

inFile = paste(c(workDir, 'target_genome_index.txt'), collapse='/')

tbl.target = read.delim(inFile, sep='\t', header=F)
colnames(tbl.target) = c('OTUId', 'genome_file', 'genome_ID', 'X', 'Y', 'Z')
tbl.target = tbl.target %>% distinct(OTUId)


cat('Number of target OTUs: ', tbl.target$OTUId %>% unique %>% length, '\n')
cat('----------\n')
tbl.target %>% head(n=3)


Number of target OTUs:  196 
----------
     OTUId
1 OTU.9267
2 OTU.1457
3   OTU.77
                                                                                    genome_file
1 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
2 /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Thermomonospora_curvata_DSM_43183.fasta
3   /home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/Pseudoxanthomonas_spadix_BD-a59.fasta
                                   genome_ID X           Y    Z
1 CP001738_Thermomonospora_curvata_DSM_43183 1 0.003132433 4370
2 CP001738_Thermomonospora_curvata_DSM_43183 1 0.011451281 1176
3   CP003093_Pseudoxanthomonas_spadix_BD_a59 1 0.028904303  569

Plotting abundance distributions


In [ ]:
%%R -w 800 -h 250
# plotting relative abundances

tbl = tbl %>% 
    group_by(fraction) %>%
    mutate(rel_abund = count / sum(count))


## plot
p = ggplot(tbl, aes(BD_mid, count, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity', position='dodge', alpha=0.5)

In [ ]:
%%R -w 800 -h 250

p = ggplot(tbl, aes(BD_mid, rel_abund, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity')

Abundance distribution of just target taxa


In [ ]:
%%R

targets = tbl.target$OTUId %>% as.vector %>% unique 

tbl.f = tbl %>%
    filter(taxon %in% targets)

tbl.f %>% head

In [ ]:
%%R -w 800 -h 250
# plotting absolute abundances

## plot
p = ggplot(tbl.f, aes(BD_mid, count, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity', position='dodge', alpha=0.5)

In [ ]:
%%R -w 800 -h 250
# plotting relative abundances

p = ggplot(tbl.f, aes(BD_mid, rel_abund, fill=taxon)) +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p + geom_area(stat='identity')

Plotting 'true' taxon abundance distribution (from priming exp dataset)


In [ ]:
%%R -i metaDataFile
# loading priming_exp metadata file

meta = read.delim(metaDataFile, sep='\t')
meta %>% head(n=4)

In [ ]:
%%R -i otuTableFile
# loading priming_exp OTU table 

tbl.otu.true = read.delim(otuTableFile, sep='\t') %>%
    select(OTUId, starts_with('X12C.700.28')) 
tbl.otu.true %>% head(n=3)

In [ ]:
%%R
# editing table
tbl.otu.true.w = tbl.otu.true %>%
    gather('sample', 'count', 2:ncol(tbl.otu.true)) %>%
    mutate(sample = gsub('^X', '', sample)) %>%
    group_by(sample) %>%
    mutate(rel_abund = count / sum(count)) %>%
    ungroup() %>%
    filter(count > 0)
tbl.otu.true.w %>% head(n=5)

In [ ]:
%%R
tbl.true.j = inner_join(tbl.otu.true.w, meta, c('sample' = 'Sample'))
tbl.true.j %>% as.data.frame %>% head(n=3)

In [ ]:
%%R -w 800 -h 300  -i workDir
# plotting number of taxa at each BD

tbl = read.csv('OTU_abs1e9_sub.txt', sep='\t') 

tbl.nt = tbl %>%
    filter(count > 0) %>%
    group_by(library, BD_mid) %>%
    summarize(n_taxa = n())

## plot
p = ggplot(tbl.nt, aes(BD_mid, n_taxa)) +
    geom_area(stat='identity', alpha=0.5) +
    geom_point() +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density', y='Number of taxa') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )
p

In [ ]:
%%R -w 700 -h 350

tbl.true.j.s = tbl.true.j %>%
    filter(count > 0) %>%
    group_by(sample, Density) %>%
    summarize(n_taxa = sum(count > 0))

ggplot(tbl.true.j.s, aes(Density, n_taxa)) +
    geom_area(stat='identity', alpha=0.5) +
    geom_point() +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density', y='Number of taxa') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )

Plotting total counts for each sample


In [ ]:
%%R -h 300 -w 600
tbl.true.j.s = tbl.true.j %>%
    group_by(sample, Density) %>%
    summarize(total_count = sum(count)) 

ggplot(tbl.true.j.s, aes(Density, total_count)) +
    geom_point() +
    geom_line() +
    labs(x='Buoyant density', y='Total sequences') +
    theme_bw() +
    theme(
        text = element_text(size=16)
        )

Plotting abundance distribution of target OTUs


In [ ]:
%%R
tbl.true.j.f = tbl.true.j %>%
    filter(OTUId %in% targets) %>%
    arrange(OTUId, Density) %>%
    group_by(sample)
tbl.true.j.f %>% head(n=3) %>% as.data.frame

In [ ]:
%%R -w 800 -h 250
# plotting relative abundances

## plot
ggplot(tbl.true.j.f, aes(Density, rel_abund, fill=OTUId)) +
    geom_area(stat='identity') +
    geom_vline(xintercept=c(BD.GCp0, BD.GCp100), linetype='dashed', alpha=0.5) +
    labs(x='Buoyant density') +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        legend.position = 'none'
    )

Combining true and simulated OTU tables for target taxa


In [ ]:
%%R
tbl.f.e = tbl.f %>%
    mutate(library = 'simulation') %>%
    rename('density' = BD_mid) %>%
    select(-BD_min, -BD_max)

tbl.true.e = tbl.true.j.f %>% 
    select('taxon' = OTUId,
           'fraction' = sample,
           'density' = Density,
           count, rel_abund) %>%
    mutate(library = 'true') 
           
           
tbl.sim.true = rbind(tbl.f.e, tbl.true.e) %>% as.data.frame
tbl.f.e = data.frame()
tbl.true.e = data.frame()

tbl.sim.true %>% head(n=3)

In [ ]:
%%R
# check
cat('Number of target taxa: ', tbl.sim.true$taxon %>% unique %>% length, '\n')

Abundance distributions of each target taxon


In [ ]:
%%R -w 900 -h 3500

tbl.sim.true.f = tbl.sim.true %>%
    ungroup() %>%
    filter(density >= 1.677) %>%
    filter(density <= 1.761) %>%
    group_by(taxon) %>%
    mutate(mean_rel_abund = mean(rel_abund))  %>%
    ungroup()

tbl.sim.true.f$taxon = reorder(tbl.sim.true.f$taxon, -tbl.sim.true.f$mean_rel_abund)

ggplot(tbl.sim.true.f, aes(density, rel_abund, color=library)) +
    geom_point() +
    geom_line() +
    theme_bw() +
    facet_wrap(~ taxon, ncol=4, scales='free_y')

In [ ]:
%%R
tbl.otu.true.w %>% 
    filter(OTUId == 'OTU.1') %>%
    as.data.frame()

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: