In [5]:
workDir = '/home/nick/notebook/SIPSim/dev/Ecoli/'
genomeDir = '/home/nick/notebook/SIPSim/dev/Ecoli/genomes/'
In [54]:
import glob
import nestly
from IPython.display import Image, display
import matplotlib.pyplot as plt
In [49]:
%load_ext rpy2.ipython
%matplotlib inline
In [8]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [88]:
# building tree structure
nest = nestly.Nest()
## varying params
nest.add('bw',
['scott',
'silverman',
1, 0.5, 0.1, 0.01, 0.001])
## set params
nest.add('np', [1], create_dir=False)
## input/output files
nest.add('fileName', ['ampFrags'], create_dir=False)
nest.add('genome_index', [os.path.join(genomeDir, 'genome_index.txt')], create_dir=False)
nest.add('genome_dir', [genomeDir], create_dir=False)
nest.add('primers', [os.path.join(workDir, '../', '515F-806R.fna')], create_dir=False)
# building directory tree
buildDir = os.path.join(workDir, 'frag_kde')
nest.build(buildDir)
In [89]:
bashFile = os.path.join(workDir, 'SIPSimRun.sh')
In [90]:
%%writefile $bashFile
#!/bin/bash
# simulating fragments
SIPSim fragments \
{genome_index} \
--fp {genome_dir} \
--fr {primers} \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 10000 \
--np {np} \
2> {fileName}.log \
> {fileName}.pkl
# converting to kde object
SIPSim fragment_kde \
{fileName}.pkl \
--bw {bw} \
> {fileName}_kde.pkl
# getting bandwidth (for checking on scott * silverman)
SIPSim KDE_bandwidth {fileName}_kde.pkl > {fileName}_kde_bw.txt
In [91]:
!chmod 775 $bashFile
In [92]:
!cd $workDir; \
nestrun -j 10 --template-file $bashFile -d frag_kde
In [93]:
!cd $workDir; \
find . -name "*_kde.pkl" | \
perl -pe 's/\.pkl//' | \
xargs -I % -P 5 SIPSim KDE_plot %.pkl -o %.png
In [94]:
tmpDir = os.path.join(workDir, 'frag_kde')
pngs = glob.glob(os.path.join(tmpDir, '*/*_kde.png'))
bw = glob.glob(os.path.join(tmpDir, '*/*_kde_bw.txt'))
for x,y in zip(sorted(pngs), sorted(bw)):
print x
!cat $y
img = Image(x)
display(img)
In [ ]: