In [14]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome3/'
SIPSimExe = '/home/nick/notebook/SIPSim/SIPSim'
genomeDir = '/home/nick/notebook/SIPSim/dev/bac_genome1210/genomes/'
#genomeTable = '/var/seq_data/ncbi_db/genome/prok-bac_filt_rand.txt'
In [2]:
import os,sys
import numpy as np
import pandas as pd
from ggplot import *
import matplotlib.pyplot as plt
In [3]:
%load_ext rpy2.ipython
%matplotlib inline
In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [15]:
if not os.path.isdir(workDir):
os.mkdir(workDir)
In [16]:
workDirGenome = os.path.join(workDir, 'genomes')
if not os.path.isdir(workDirGenome):
os.mkdir(workDirGenome)
In [11]:
%%bash -s "$genomeDir"
cd $1
find . -name "*fasta" | \
perl -pe 's/\.fasta//' | \
xargs -P 24 -I % bash -c \
"seq_tools GC_content < %.fasta > %_GC.txt"
In [12]:
%%bash -s "$genomeDir"
cd $1
cat ./Clostridium_ljungdahlii_DSM_13528_GC.txt
cat ./Escherichia_coli_1303_GC.txt
cat ./Streptomyces_pratensis_ATCC_33331_GC.txt
In [18]:
%%bash -s "$genomeDir" "$workDirGenome"
cp $1/Clostridium_ljungdahlii_DSM_13528.fasta $2
cp $1/Escherichia_coli_1303.fasta $2
cp $1/Streptomyces_pratensis_ATCC_33331.fasta $2
In [25]:
!cd $workDirGenome; \
printf "Clostridium_ljungdahlii_DSM_13528 Clostridium_ljungdahlii_DSM_13528.fasta\n\
Escherichia_coli_1303 Escherichia_coli_1303.fasta\n\
Streptomyces_pratensis_ATCC_33331 Streptomyces_pratensis_ATCC_33331.fasta" > genome_index.txt
!cd $workDirGenome; \
perl -pi -e 's/ /\t/' genome_index.txt
In [ ]:
!cd $workDir; \
$SIPSimExe genome_index genomes/genome_index.txt \
--fp ./genomes/ --np 30 > genomes/index_log.txt
In [ ]:
In [ ]:
In [ ]:
In [ ]: