In [6]:
workDir = '../../t/SIPSim_example/'
In [7]:
import os
In [8]:
%load_ext rpy2.ipython
In [10]:
# making directories
## working directory
workDir = os.path.abspath(workDir)
if not os.path.isdir(workDir):
os.makedirs(workDir)
%cd $workDir
In [11]:
# making directories
## genome directory
workDirGenome = os.path.join(workDir, 'genomes')
if not os.path.isdir(workDirGenome):
os.mkdir(workDirGenome)
print(workDirGenome)
In [37]:
!conda list -n SIPSim
Let's check that SIPSim is installed properly
In [13]:
%%bash
source activate SIPSim
SIPSim -l
In [15]:
taxa="""Clostridium_ljungdahlii_DSM_13528 NC_014328.1
Escherichia_coli_1303 NZ_CP009166.1
Streptomyces_pratensis_ATCC_33331 NC_016114.1
"""
genome_file = os.path.join(workDir, 'genome_list.txt')
with open(genome_file, 'wb') as oFH:
oFH.write(taxa)
print 'File written: {}'.format(genome_file)
In [16]:
%%bash -s $genome_file
source activate SIPSim
# downloading genomes
SIPSim genome_download -d genomes -n 3 $1
In [18]:
!ls -thlc ./genomes
Hopefully all 3 genomes downloaded (the files should be non-empty)
In [19]:
# current sequence names
!grep ">" genomes/*fna | perl -pe 's/.+:>/>/'
In [20]:
%%bash
source activate SIPSim
# making sure each sequence is unique
find ./genomes/ -name "*fna" | \
SIPSim genome_rename -n 3 --prefix genomes_rn -
In [21]:
# NEW sequence names
!grep ">" genomes_rn/*fna | perl -pe 's/.+:>/>/'
MFPrimer_linux
to be installed
In [30]:
%%bash
source activate SIPSim
# Checking that MFE_primer.py (and associated scripts) are installed
MFE_primer.py -h | head
In [31]:
# changing the working directory
workDirGenome = os.path.join(workDir, 'genomes_rn')
%cd $workDirGenome
In [32]:
# making index file (taxon_name<tab>taxon_genome_file_name)
indexFile = """Clostridium_ljungdahlii_DSM_13528 Clostridium_ljungdahlii_DSM_13528.fna
Escherichia_coli_1303 Escherichia_coli_1303.fna
Streptomyces_pratensis_ATCC_33331 Streptomyces_pratensis_ATCC_33331.fna""".replace(' ', '\t')
F = os.path.join(workDirGenome, 'genome_index.txt')
with open(F, 'wb') as oFH:
oFH.write(indexFile)
print 'File written: {}'.format(F)
Note: This next step will use 3 processors (
--np
). Change this option if needed. Even with 3 processors, it will take a minute to complete
While you wait, here's turtle...
____,------------------,______
___/ \ / \_____
__/ \__________/ \___ \___
,^------.____/\ / \ / `----\_
| (O)) / \_________/ \____________/ \ \
\_____,--' / / \ / \ \ \
\___,---|___/_______,---`----------'----,_________\__________\_\
/ :__________________________/ :___________________/
/ : / : / : / :
/ : / : / : / :
(^^^ ) (^^^ ) (^^^ ) (^^^ )
^^^^^^^^ ^^^^^^^^ ^^^^^^^^ ^^^^^^^^
In [33]:
%%bash
source activate SIPSim
# indexing genomes; saving log
SIPSim genome_index \
genome_index.txt \
--fp . --np 3 \
> index_log.txt
In [36]:
# checking all of the files produced in the ./genome_rn/ directory
!ls -thlc
In [ ]: