In [1]:
    
workDir = '/home/nick/notebook/SIPSim/dev/Ecoli/'
SIPSimExe = '/home/nick/notebook/SIPSim/SIPSim'
    
In [2]:
    
import os,sys
import numpy as np
import pandas as pd
from ggplot import *
import matplotlib.pyplot as plt
    
In [3]:
    
%load_ext rpy2.ipython
%matplotlib inline
    
In [4]:
    
if not os.path.isdir(workDir):
    os.mkdir(workDir)
    
In [9]:
    
genomeDir = os.path.join(workDir, 'genomes')
if not os.path.isdir(genomeDir):
    os.mkdir(genomeDir)
    
In [10]:
    
!cd $genomeDir; \
    seqDB_tools accession-GI2fasta < ../accession.txt > Ecoli_O157H7.fna
    
    
In [12]:
    
!cd $genomeDir; \
    seq_tools fasta_info --tl --tgc --header Ecoli_O157H7.fna
    
    
In [13]:
    
# list of all genomes files and their associated names
!cd $genomeDir; \
    find . -name "*fna" | \
    perl -pe 's/.+\///' | \
    perl -pe 's/(.+)(\.[^.]+)/\$1\t\$1\$2/' > genome_index.txt
    
In [ ]:
    
!cd $genomeDir; \
    $SIPSimExe indexGenomes genome_index.txt --fp .
    
    
In [ ]: