In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/Ecoli/'
SIPSimExe = '/home/nick/notebook/SIPSim/SIPSim'
In [2]:
import os,sys
import numpy as np
import pandas as pd
from ggplot import *
import matplotlib.pyplot as plt
In [3]:
%load_ext rpy2.ipython
%matplotlib inline
In [4]:
if not os.path.isdir(workDir):
os.mkdir(workDir)
In [9]:
genomeDir = os.path.join(workDir, 'genomes')
if not os.path.isdir(genomeDir):
os.mkdir(genomeDir)
In [10]:
!cd $genomeDir; \
seqDB_tools accession-GI2fasta < ../accession.txt > Ecoli_O157H7.fna
In [12]:
!cd $genomeDir; \
seq_tools fasta_info --tl --tgc --header Ecoli_O157H7.fna
In [13]:
# list of all genomes files and their associated names
!cd $genomeDir; \
find . -name "*fna" | \
perl -pe 's/.+\///' | \
perl -pe 's/(.+)(\.[^.]+)/\$1\t\$1\$2/' > genome_index.txt
In [ ]:
!cd $genomeDir; \
$SIPSimExe indexGenomes genome_index.txt --fp .
In [ ]: