In [4]:
workDir = '/home/nick/notebook/SIPSim/dev/Ecoli/'
genomeDir = '/home/nick/notebook/SIPSim/dev/Ecoli/genomes/'
In [6]:
import glob
import nestly
In [3]:
%load_ext rpy2.ipython
In [42]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [71]:
# building tree structure
nest = nestly.Nest()
## varying params
nest.add('fld',
['uniform,1000,2000',
'uniform,20000,20500',
'uniform,50000,51000',
'normal,9000,2500',
'normal,12000,500',
'skewed-normal,9000,2500,-5'])
## set params
nest.add('np', [5], create_dir=False)
## input/output files
nest.add('fileName', ['ampFrags'], create_dir=False)
nest.add('genome_index', [os.path.join(genomeDir, 'genome_index.txt')], create_dir=False)
nest.add('genome_dir', [genomeDir], create_dir=False)
nest.add('primers', [os.path.join(workDir, '../', '515F-806R.fna')], create_dir=False)
# building directory tree
buildDir = os.path.join(workDir, 'frag_sim')
nest.build(buildDir)
In [72]:
bashFile = os.path.join(workDir, 'SIPSimRun.sh')
In [73]:
%%writefile $bashFile
#!/bin/bash
# simulating fragments
SIPSim fragments \
{genome_index} \
--fp {genome_dir} \
--fr {primers} \
--fld {fld} \
--flr None,None \
--nf 10000 \
--np {np} \
--tbl \
2> {fileName}.log \
> {fileName}.txt
In [74]:
!chmod 775 $bashFile
In [75]:
!cd $workDir; \
nestrun -j 4 --template-file $bashFile -d frag_sim
In [76]:
!cd $workDir; \
nestagg delim -t -k fld -d frag_sim -o frag_sim/ampFrags.txt ampFrags.txt
In [77]:
%%R -i workDir
setwd(workDir)
tbl = read.delim('frag_sim/ampFrags.txt', sep='\t')
tbl %>% head
In [78]:
%%R -w 950 -h 450
ggplot(tbl, aes(fragGC, fragLength)) +
geom_point(shape='O') +
labs(x='Fragment G+C', y='Fragment length (bp)') +
facet_wrap(~ fld) +
theme_bw() +
theme(
text=element_text(size=16),
axis.title.y=element_text(vjust=1)
)
In [82]:
%%R -w 950 -h 450
ggplot(tbl, aes(fragLength)) +
geom_histogram(binwidth=100) +
labs(x='Fragment G+C', y='Fragment length (bp)') +
facet_wrap(~ fld) +
theme_bw() +
theme(
text=element_text(size=16),
axis.title.y=element_text(vjust=1)
)
In [ ]: