Goal

Accuracy as a function of isotope incorporation & gradient fraction size

acc ~ fraction_size + incorp

Variable parameters:

  • atom % isotope incorporation
    • 0, 15, 25, 50, 75, 100
  • gradient fraction size
    • 0.003, 0.004, 0.006, 0.008
  • n-reps (stocastic: taxon abundances & which incorporate)
    • 10

Setting paths


In [1]:
# paths
import os

workDir = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/'
buildDir = os.path.join(workDir, 'atomIncorp_fracSize')
R_dir = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/SIPSimR/scripts/'

fragFile = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/ampFrags_kde.pkl'
genome_index = '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/genome_index.txt'

Init


In [2]:
import glob
import itertools
import nestly

In [3]:
%load_ext pushmsg

In [4]:
if not os.path.isdir(buildDir):
    os.makedirs(buildDir)
%cd $buildDir


/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize

BD min/max


In [5]:
## min G+C cutoff
min_GC = 13.5
## max G+C cutoff
max_GC = 80
## max G+C shift
max_13C_shift_in_BD = 0.036


min_BD = min_GC/100.0 * 0.098 + 1.66    
max_BD = max_GC/100.0 * 0.098 + 1.66    

max_BD = max_BD + max_13C_shift_in_BD

print('Min BD: {}'.format(min_BD))
print('Max BD: {}'.format(max_BD))


Min BD: 1.67323
Max BD: 1.7744

Nestly


In [6]:
# making an experimental design file for qSIP
x = range(1,7)
y = ['control', 'treatment']

expDesignFile = os.path.join(buildDir, 'qSIP_exp_design.txt')
with open(expDesignFile, 'wb') as outFH:
    for i,z in itertools.izip(x,itertools.cycle(y)):
        line = '\t'.join([str(i),z])
        outFH.write(line + '\n')

!head $expDesignFile


1	control
2	treatment
3	control
4	treatment
5	control
6	treatment

Nestly params


In [7]:
# building tree structure
nest = nestly.Nest()

# varying params: test fraction size
#nest.add('percIncorp', [50])
#nest.add('frac_mu', [0.003, 0.004, 0.006, 0.008])
#nest.add('rep', range(1,11))


# varying params: TEST subset
#nest.add('percIncorp', [0, 100])
#nest.add('frac_mu', [0.004, 0.008])
#nest.add('rep', [1])

# varying params
nest.add('percIncorp', [0, 15, 25, 50, 75, 100])
nest.add('frac_mu', [0.003, 0.004, 0.006, 0.008])
nest.add('rep', range(1,11))


## set params
nest.add('percTaxa', [10], create_dir=False)
nest.add('abs', ['1e9'], create_dir=False)
#nest.add('abs', ['1e7'], create_dir=False)      # TESTING
nest.add('np', [4], create_dir=False)
nest.add('Monte_rep', [100000], create_dir=False)
nest.add('subsample_dist', ['lognormal'], create_dir=False)
nest.add('subsample_mean', [9.432], create_dir=False)
nest.add('subsample_scale', [0.5], create_dir=False)
nest.add('subsample_min', [10000], create_dir=False)
nest.add('subsample_max', [30000], create_dir=False)
nest.add('min_BD', [min_BD], create_dir=False)
nest.add('max_BD', [max_BD], create_dir=False)
nest.add('DBL_scaling', [0.5], create_dir=False)
nest.add('bandwidth', [0.8], create_dir=False)
nest.add('heavy_BD_min', [1.71], create_dir=False)
nest.add('heavy_BD_max', [1.75], create_dir=False)
nest.add('topTaxaToPlot', [100], create_dir=False)
nest.add('padj', [0.1], create_dir=False)
nest.add('log2', [0.25], create_dir=False)

### input/output files
nest.add('buildDir', [buildDir], create_dir=False)
nest.add('R_dir', [R_dir], create_dir=False)
nest.add('genome_index', [genome_index], create_dir=False)
nest.add('fragFile', [fragFile], create_dir=False)
nest.add('exp_design', [expDesignFile], create_dir=False)


# building directory tree
nest.build(buildDir)

# bash file to run
bashFile = os.path.join(buildDir, 'SIPSimRun.sh')

Experimental design


In [8]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_expDesign.sh'
bashFileTmp


Out[8]:
'/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/SIPSimRun_expDesign.sh'

In [9]:
%%writefile $bashFileTmp
#!/bin/bash
source activate SIPSim
# OPENBLAS threads 
export OMP_NUM_THREADS=1

echo '#-- Experimental design --#'

echo '# Making an isotope incorporation config file'
echo '## 3 replicate gradients for control & treatment'
SIPSim incorp_config_example \
  --percIncorpUnif {percIncorp} \
  --n_reps 3 \
  > incorp.config

echo '# Selecting incorporator taxa'
echo '## This is to make the gradient replicates consistent (qSIP finds mean among replicates)'
SIPSim KDE_select_taxa \
    -p {percTaxa} \
    {fragFile} \
    > incorporators.txt

echo '# Creating a community file (3 replicate control, 3 replicate treatment)'
SIPSim communities \
    --config incorp.config \
    {genome_index} \
    > comm.txt    

echo '# simulating gradient fractions'
SIPSim gradient_fractions \
    --params mu:{frac_mu},sigma:0.0015 \
    --BD_min {min_BD} \
    --BD_max {max_BD} \
    comm.txt \
    > fracs.txt


Writing /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/SIPSimRun_expDesign.sh

In [10]:
!chmod 755 $bashFileTmp

In [11]:
%%bash -s $workDir $bashFileTmp $buildDir
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate py2_ley0.4
# change to working dir
cd $1
# run job 
nestrun --template-file $2 -d $3 --log-file exp_design.log -j 20


2017-08-21 20:19:53,780 * INFO * Template: ./SIPSimRun_expDesign.sh
2017-08-21 20:19:53,785 * INFO * [61691] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/1
2017-08-21 20:19:53,789 * INFO * [61694] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/2
2017-08-21 20:19:53,794 * INFO * [61700] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/3
2017-08-21 20:19:53,798 * INFO * [61709] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/4
2017-08-21 20:19:53,803 * INFO * [61718] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/5
2017-08-21 20:19:53,808 * INFO * [61726] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/6
2017-08-21 20:19:53,813 * INFO * [61735] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/7
2017-08-21 20:19:53,817 * INFO * [61742] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/8
2017-08-21 20:19:53,821 * INFO * [61750] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/9
2017-08-21 20:19:53,826 * INFO * [61759] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/10
2017-08-21 20:19:53,830 * INFO * [61766] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/1
2017-08-21 20:19:53,835 * INFO * [61774] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/2
2017-08-21 20:19:53,841 * INFO * [61782] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/3
2017-08-21 20:19:53,846 * INFO * [61790] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/4
2017-08-21 20:19:53,851 * INFO * [61798] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/5
2017-08-21 20:19:53,856 * INFO * [61808] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/6
2017-08-21 20:19:53,862 * INFO * [61815] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/7
2017-08-21 20:19:53,867 * INFO * [61822] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/8
2017-08-21 20:19:53,874 * INFO * [61832] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/9
2017-08-21 20:19:53,880 * INFO * [61839] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/10
2017-08-21 20:20:07,250 * INFO * [61832] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/9 Finished with 0
2017-08-21 20:20:07,273 * INFO * [62484] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/1
2017-08-21 20:20:07,274 * INFO * [61790] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/4 Finished with 0
2017-08-21 20:20:07,279 * INFO * [62487] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/2
2017-08-21 20:20:07,299 * INFO * [61709] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/4 Finished with 0
2017-08-21 20:20:07,305 * INFO * [62500] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/3
2017-08-21 20:20:07,318 * INFO * [61750] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/9 Finished with 0
2017-08-21 20:20:07,326 * INFO * [62508] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/4
2017-08-21 20:20:07,326 * INFO * [61735] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/7 Finished with 0
2017-08-21 20:20:07,335 * INFO * [62512] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/5
2017-08-21 20:20:07,335 * INFO * [61815] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/7 Finished with 0
2017-08-21 20:20:07,341 * INFO * [62519] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/6
2017-08-21 20:20:07,348 * INFO * [61691] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/1 Finished with 0
2017-08-21 20:20:07,353 * INFO * [62532] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/7
2017-08-21 20:20:07,354 * INFO * [61694] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/2 Finished with 0
2017-08-21 20:20:07,363 * INFO * [62537] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/8
2017-08-21 20:20:07,363 * INFO * [61726] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/6 Finished with 0
2017-08-21 20:20:07,369 * INFO * [62543] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/9
2017-08-21 20:20:07,370 * INFO * [61742] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/8 Finished with 0
2017-08-21 20:20:07,379 * INFO * [62553] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/10
2017-08-21 20:20:07,388 * INFO * [61700] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/3 Finished with 0
2017-08-21 20:20:07,401 * INFO * [62564] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/1
2017-08-21 20:20:07,401 * INFO * [61766] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/1 Finished with 0
2017-08-21 20:20:07,408 * INFO * [62567] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/2
2017-08-21 20:20:07,408 * INFO * [61808] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/6 Finished with 0
2017-08-21 20:20:07,415 * INFO * [62575] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/3
2017-08-21 20:20:07,416 * INFO * [61774] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/2 Finished with 0
2017-08-21 20:20:07,425 * INFO * [62583] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/4
2017-08-21 20:20:07,425 * INFO * [61782] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/3 Finished with 0
2017-08-21 20:20:07,430 * INFO * [62592] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/5
2017-08-21 20:20:07,431 * INFO * [61798] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/5 Finished with 0
2017-08-21 20:20:07,435 * INFO * [62599] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/6
2017-08-21 20:20:07,436 * INFO * [61718] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/5 Finished with 0
2017-08-21 20:20:07,443 * INFO * [62608] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/7
2017-08-21 20:20:07,443 * INFO * [61822] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/8 Finished with 0
2017-08-21 20:20:07,448 * INFO * [62615] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/8
2017-08-21 20:20:07,448 * INFO * [61759] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.003/10 Finished with 0
2017-08-21 20:20:07,456 * INFO * [62628] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/9
2017-08-21 20:20:07,456 * INFO * [61839] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.004/10 Finished with 0
2017-08-21 20:20:07,461 * INFO * [62631] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/10
2017-08-21 20:20:17,298 * INFO * [62512] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/5 Finished with 0
2017-08-21 20:20:17,306 * INFO * [63274] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/1
2017-08-21 20:20:17,513 * INFO * [62519] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/6 Finished with 0
2017-08-21 20:20:17,519 * INFO * [63282] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/2
2017-08-21 20:20:17,541 * INFO * [62575] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/3 Finished with 0
2017-08-21 20:20:17,566 * INFO * [63290] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/3
2017-08-21 20:20:17,566 * INFO * [62487] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/2 Finished with 0
2017-08-21 20:20:17,575 * INFO * [63293] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/4
2017-08-21 20:20:17,575 * INFO * [62500] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/3 Finished with 0
2017-08-21 20:20:17,581 * INFO * [63303] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/5
2017-08-21 20:20:17,582 * INFO * [62508] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/4 Finished with 0
2017-08-21 20:20:17,588 * INFO * [63311] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/6
2017-08-21 20:20:17,589 * INFO * [62532] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/7 Finished with 0
2017-08-21 20:20:17,596 * INFO * [63321] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/7
2017-08-21 20:20:17,596 * INFO * [62543] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/9 Finished with 0
2017-08-21 20:20:17,602 * INFO * [63325] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/8
2017-08-21 20:20:17,603 * INFO * [62553] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/10 Finished with 0
2017-08-21 20:20:17,609 * INFO * [63335] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/9
2017-08-21 20:20:17,610 * INFO * [62567] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/2 Finished with 0
2017-08-21 20:20:17,615 * INFO * [63343] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/10
2017-08-21 20:20:17,616 * INFO * [62583] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/4 Finished with 0
2017-08-21 20:20:17,624 * INFO * [63351] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/1
2017-08-21 20:20:17,624 * INFO * [62537] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/8 Finished with 0
2017-08-21 20:20:17,631 * INFO * [63360] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/2
2017-08-21 20:20:17,631 * INFO * [62564] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/1 Finished with 0
2017-08-21 20:20:17,638 * INFO * [63365] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/3
2017-08-21 20:20:17,638 * INFO * [62599] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/6 Finished with 0
2017-08-21 20:20:17,644 * INFO * [63374] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/4
2017-08-21 20:20:17,645 * INFO * [62628] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/9 Finished with 0
2017-08-21 20:20:17,652 * INFO * [63383] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/5
2017-08-21 20:20:17,698 * INFO * [62484] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.006/1 Finished with 0
2017-08-21 20:20:17,704 * INFO * [63394] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/6
2017-08-21 20:20:17,705 * INFO * [62608] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/7 Finished with 0
2017-08-21 20:20:17,716 * INFO * [63397] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/7
2017-08-21 20:20:17,732 * INFO * [62631] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/10 Finished with 0
2017-08-21 20:20:17,741 * INFO * [63410] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/8
2017-08-21 20:20:17,742 * INFO * [62615] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/8 Finished with 0
2017-08-21 20:20:17,749 * INFO * [63413] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/9
2017-08-21 20:20:17,758 * INFO * [62592] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/0/0.008/5 Finished with 0
2017-08-21 20:20:17,764 * INFO * [63426] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/10
2017-08-21 20:20:27,590 * INFO * [63274] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/1 Finished with 0
2017-08-21 20:20:27,596 * INFO * [64064] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/1
2017-08-21 20:20:27,836 * INFO * [63311] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/6 Finished with 0
2017-08-21 20:20:27,842 * INFO * [64072] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/2
2017-08-21 20:20:27,843 * INFO * [63343] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/10 Finished with 0
2017-08-21 20:20:27,854 * INFO * [64077] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/3
2017-08-21 20:20:27,854 * INFO * [63303] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/5 Finished with 0
2017-08-21 20:20:27,859 * INFO * [64084] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/4
2017-08-21 20:20:27,859 * INFO * [63365] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/3 Finished with 0
2017-08-21 20:20:27,865 * INFO * [64092] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/5
2017-08-21 20:20:27,865 * INFO * [63374] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/4 Finished with 0
2017-08-21 20:20:27,872 * INFO * [64099] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/6
2017-08-21 20:20:27,873 * INFO * [63397] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/7 Finished with 0
2017-08-21 20:20:27,886 * INFO * [64108] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/7
2017-08-21 20:20:27,893 * INFO * [63293] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.003/4 Finished with 0
2017-08-21 20:20:27,900 * INFO * [64120] Started ./SIPSimRun_expDesign.sh in /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.006/8
2017-08-21 20:20:27,914 * INFO * [63383] /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/15/0.004/5 Finished with 0
2017-08-21 20:20:27,919 * INFO * [6412
limit_output extension: Maximum message size of 20000 exceeded with 81236 characters

In [12]:
%pushmsg "exp_design complete: $buildDir"

SIPSim pipeline


In [13]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_SIPSim-pipeline.sh'
bashFileTmp


Out[13]:
'/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/SIPSimRun_SIPSim-pipeline.sh'

In [14]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim
# OPENBLAS threads 
export OMP_NUM_THREADS=1


echo '#-- SIPSim pipeline --#'    
    
echo '# Adding diffusion'    
SIPSim diffusion \
    -n {Monte_rep} \
    --bw {bandwidth} \
    --np {np} \
    {fragFile} \
    > ampFrags_KDE_dif.pkl    

echo '# Adding DBL contamination; abundance-weighted smearing'
SIPSim DBL \
    -n {Monte_rep} \
    --comm comm.txt \
    --commx {DBL_scaling} \
    --np {np} \
    ampFrags_KDE_dif.pkl \
    > ampFrags_KDE_dif_DBL.pkl 

echo '# Adding isotope incorporation to BD distribution'
SIPSim isotope_incorp \
    -n {Monte_rep} \
    --comm comm.txt \
    --taxa incorporators.txt \
    --np {np} \
    ampFrags_KDE_dif_DBL.pkl \
    incorp.config \
    > ampFrags_KDE_dif_DBL_inc.pkl

echo '# Simulating an OTU table'
SIPSim OTU_table \
    --abs {abs} \
    --np {np} \
    ampFrags_KDE_dif_DBL_inc.pkl \
    comm.txt \
    fracs.txt \
    > OTU_abs{abs}.txt
    
echo '# Simulating PCR'
SIPSim OTU_PCR \
    OTU_abs{abs}.txt \
    > OTU_abs{abs}_PCR.txt    
    
echo '# Subsampling from the OTU table (simulating sequencing of the DNA pool)'
SIPSim OTU_subsample \
    --dist {subsample_dist} \
    --dist_params mean:{subsample_mean},sigma:{subsample_scale} \
    --min_size {subsample_min} \
    --max_size {subsample_max} \
    OTU_abs{abs}_PCR.txt \
    > OTU_abs{abs}_PCR_sub.txt
        
echo '# Making a wide-formatted table'
SIPSim OTU_wide_long -w \
    OTU_abs{abs}_PCR_sub.txt \
    > OTU_abs{abs}_PCR_sub_w.txt
    
echo '# Making metadata (phyloseq: sample_data)'
SIPSim OTU_sample_data \
    OTU_abs{abs}_PCR_sub.txt \
    > OTU_abs{abs}_PCR_sub_meta.txt
       

#-- removing large intermediate files --#
rm -f ampFrags_KDE_dif.pkl
rm -f ampFrags_KDE_dif_DBL.pkl
rm -f ampFrags_KDE_dif_DBL_inc.pkl


Writing /ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/SIPSimRun_SIPSim-pipeline.sh

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file SIPSim_pipeline.log -j 6 --stop-on-error

In [ ]:
%pushmsg "SIPSim pipeline complete: $buildDir"

Summary of simulated data


In [ ]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_SIPSim-summary.sh'
bashFileTmp

In [ ]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim 

echo "# Plotting taxon abundances"

# plotting 'raw' taxon abundances
Rscript {R_dir}OTU_taxonAbund.R \
    OTU_abs{abs}.txt \
    -r {topTaxaToPlot} \
    -o OTU_abs{abs}

# plotting 'sequenced' taxon abundances
Rscript {R_dir}OTU_taxonAbund.R \
    OTU_abs{abs}_PCR_sub.txt \
    -r {topTaxaToPlot} \
    -o OTU_abs{abs}_PCR_sub

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file SIPSim_summary.log -j 20

HR-SIP


In [ ]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_HRSIP.sh'
bashFileTmp

In [ ]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim 

# phyloseq
## making phyloseq object from OTU table
Rscript {R_dir}phyloseq_make.R \
    OTU_abs{abs}_PCR_sub_w.txt \
    -s OTU_abs{abs}_PCR_sub_meta.txt \
    > OTU_abs{abs}_PCR_sub.physeq

## filtering phyloseq object to just 'heavy' fractions
Rscript {R_dir}phyloseq_edit.R \
    OTU_abs{abs}_PCR_sub.physeq \
    --BD_min {heavy_BD_min} \
    --BD_max {heavy_BD_max} \
    > OTU_abs{abs}_PCR_sub_filt.physeq

## making ordination
Rscript {R_dir}phyloseq_ordination.R \
    OTU_abs{abs}_PCR_sub_filt.physeq \
    OTU_abs{abs}_PCR_sub_filt_bray-NMDS.pdf

# DESeq2
Rscript {R_dir}phyloseq_DESeq2.R \
    --log2 {log2} \
    --hypo greater \
    --cont 1,3,5 \
    --treat 2,4,6 \
    OTU_abs{abs}_PCR_sub_filt.physeq \
    > OTU_abs{abs}_PCR_sub_filt_DESeq2

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file HR-SIP.log -j 20

In [ ]:
%pushmsg "HR-SIP complete: $buildDir"

MW-HR-SIP


In [ ]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_MWHRSIP.sh'
bashFileTmp

In [ ]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim 

## HR SIP pipeline
Rscript {R_dir}phyloseq_DESeq2.R \
    --log2 {log2} \
    --hypo greater \
    --cont 1,3,5 \
    --treat 2,4,6 \
    --occur_all 0.0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5 \
    -w 1.70-1.73,1.72-1.75,1.74-1.77 \
    --all OTU_abs1e9_PCR_sub_MW-all.txt \
    OTU_abs{abs}_PCR_sub.physeq \
    > OTU_abs{abs}_PCR_sub_filt_MW_DESeq2

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file MW-HR-SIP.log -j 20

In [ ]:
%pushmsg "MW-HR-SIP complete: $buildDir"

q-SIP


In [ ]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_qSIP.sh'
bashFileTmp

In [ ]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim 
# OPENBLAS threads 
export OMP_NUM_THREADS=1

# qSIP
SIPSim qSIP \
    OTU_abs{abs}.txt \
    OTU_abs{abs}_PCR_sub.txt \
    > OTU_abs{abs}_PCR_sub_qSIP.txt
        

# qSIP: atom excess
SIPSim qSIP_atom_excess \
    --np {np} \
    OTU_abs{abs}_PCR_sub_qSIP.txt \
    {exp_design} \
    > OTU_abs{abs}_PCR_sub_qSIP_atom.txt

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file qSIP.log -j 6

In [ ]:
%pushmsg "q-SIP complete: $buildDir"

delta-BD


In [ ]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_dBD.sh'
bashFileTmp

In [ ]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim 
# OPENBLAS threads 
export OMP_NUM_THREADS=1

#deltaBD 
SIPSim deltaBD \
    OTU_abs{abs}_PCR_sub.txt \
    {exp_design} \
    > OTU_abs{abs}_PCR_sub_dBD.txt

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file deltaBD.log -j 20

In [ ]:
%pushmsg "deltaBD complete: $buildDir"

Making confusion matrices


In [ ]:
bashFileTmp = os.path.splitext(bashFile)[0] + '_cMtx.sh'
bashFileTmp

In [ ]:
%%writefile $bashFileTmp
#!/bin/bash
# offset job start to prevent conda activate errors
sleep $[ ( $RANDOM % 10 )  + 1 ]s
source activate SIPSim

# HR-SIP
Rscript {R_dir}DESeq2_confuseMtx.R \
    --libs 2,4,6 \
    --padj {padj} \
    BD-shift_stats.txt \
    OTU_abs{abs}_PCR_sub_filt_DESeq2

# HR-SIP multiple 'heavy' BD windows
Rscript {R_dir}DESeq2_confuseMtx.R \
    --libs 2,4,6 \
    --padj {padj} \
    -o DESeq2_multi-cMtx \
    BD-shift_stats.txt \
    OTU_abs{abs}_PCR_sub_filt_MW_DESeq2
    
# qSIP    
Rscript {R_dir}qSIP_confuseMtx.R \
    --libs 2,4,6 \
    BD-shift_stats.txt \
    OTU_abs{abs}_PCR_sub_qSIP_atom.txt

# heavy-SIP    
Rscript {R_dir}heavy_confuseMtx.R \
    --treat 2,4,6 \
    --con 1,3,5 \
    --method 1 \
    BD-shift_stats.txt \
    OTU_abs{abs}_PCR_sub.txt

In [ ]:
!chmod 755 $bashFileTmp

In [ ]:
%%bash -s $workDir $bashFileTmp $buildDir
source activate py2_ley0.4
cd $1

nestrun --template-file $2 -d $3 --log-file cMtx.log -j 20

Aggregating confusion matrices


In [ ]:
def agg_cMtx(prefix):
    # all data
    x = prefix + '-cMtx_data.txt'
    !nestagg delim \
       -d $buildDir \
       -k percIncorp,frac_mu,rep \
       -o $x \
       --tab \
       $x

    # overall
    x = prefix + '-cMtx_overall.txt'
    !nestagg delim \
        -d $buildDir \
        -k percIncorp,frac_mu,rep \
        -o $x \
        --tab \
        $x

    # by class
    x = prefix + '-cMtx_byClass.txt'
    !nestagg delim \
        -d $buildDir \
        -k percIncorp,frac_mu,rep \
        -o $x \
        --tab \
        $x
        
agg_cMtx('DESeq2')
agg_cMtx('DESeq2_multi')
agg_cMtx('qSIP') 
agg_cMtx('heavy')

In [ ]:
%pushmsg "atomIncorp_fracSize complete!"

--End of simulation--


Results


In [50]:
# checking for errors
!find $buildDir -name "*log" | wc -l
!find $buildDir -name "*log" | xargs grep -i error


1920

In [51]:
F = os.path.join(buildDir, '*-cMtx_byClass.txt')
files = glob.glob(F)
files


Out[51]:
['/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/DESeq2-cMtx_byClass.txt',
 '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/DESeq2_multi-cMtx_byClass.txt',
 '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/qSIP-cMtx_byClass.txt',
 '/ebio/abt3_projects/methanogen_host_evo/SIPSim_pt2/data/bac_genome1147/atomIncorp_fracSize/heavy-cMtx_byClass.txt']

In [ ]: