In [1]:
workDir = '/home/nick/t/SIPSim/'
nprocs = 3
In [2]:
import os
import glob
In [3]:
%load_ext rpy2.ipython
In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
In [5]:
if not os.path.isdir(workDir):
os.makedirs(workDir)
%cd $workDir
genomeDir = os.path.join(workDir, 'genomes_rn')
The script below ("SIPSim incorpConfigExample
") is helpful for making simple experimental designs
In [6]:
# this file
!SIPSim incorpConfigExample \
--percTaxa 34 \
--percIncorpUnif 50 \
--n_reps 1 \
> incorp.config
In [7]:
!cat incorp.config
In [8]:
!SIPSim communities \
--config incorp.config \
./genomes_rn/genome_index.txt \
> comm.txt
In [9]:
!cat comm.txt
Note: "library" = gradient
In [10]:
!SIPSim gradient_fractions \
--BD_min 1.67323 \
--BD_max 1.7744 \
comm.txt \
> fracs.txt
In [11]:
!head -n 6 fracs.txt
In [12]:
primers = """>515F
GTGCCAGCMGCCGCGGTAA
>806R
GGACTACHVGGGTWTCTAAT
"""
F = os.path.join(workDir, '515F-806R.fna')
with open(F, 'wb') as oFH:
oFH.write(primers)
print 'File written: {}'.format(F)
In [13]:
# skewed-normal
!SIPSim fragments \
$genomeDir/genome_index.txt \
--fp $genomeDir \
--fr 515F-806R.fna \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 10000 \
--np $nprocs \
--tbl \
> ampFrags.txt
In [14]:
!head -n 5 ampFrags.txt
In [15]:
%%R -w 700 -h 350
df = read.delim('ampFrags.txt')
ggplot(df, aes(fragGC, fragLength, color=taxon_name)) +
geom_density2d() +
scale_color_discrete('Taxon') +
labs(x='Fragment G+C', y='Fragment length (bp)') +
theme_bw() +
theme(
text = element_text(size=16)
)
Note: for information on what's going on in this config file, use the command: SIPSim isotope_incorp -h
In [16]:
!SIPSim fragment_KDE \
ampFrags.txt \
> ampFrags_kde.pkl
SIPSim KDE_sample
In [17]:
!SIPSim diffusion \
ampFrags_kde.pkl \
--np $nprocs \
> ampFrags_kde_dif.pkl
In [20]:
n = 100000
!SIPSim KDE_sample -n $n ampFrags_kde.pkl > ampFrags_kde.txt
!SIPSim KDE_sample -n $n ampFrags_kde_dif.pkl > ampFrags_kde_dif.txt
!ls -thlc ampFrags_kde*.txt
In [32]:
%%R
df1 = read.delim('ampFrags_kde.txt', sep='\t')
df2 = read.delim('ampFrags_kde_dif.txt', sep='\t')
df1$data = 'no diffusion'
df2$data = 'diffusion'
df = rbind(df1, df2) %>%
gather(Taxon, BD, Clostridium_ljungdahlii_DSM_13528,
Escherichia_coli_1303, Streptomyces_pratensis_ATCC_33331) %>%
mutate(Taxon = gsub('_(ATCC|DSM)', '\n\\1', Taxon))
df %>% head(n=3)
In [33]:
%%R -w 800 -h 300
ggplot(df, aes(BD, fill=data)) +
geom_density(alpha=0.25) +
facet_wrap( ~ Taxon) +
scale_fill_discrete('') +
theme_bw() +
theme(
text=element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.text.x = element_text(angle=50, hjust=1)
)
In [34]:
!SIPSim DBL \
ampFrags_kde_dif.pkl \
--np $nprocs \
> ampFrags_kde_dif_DBL.pkl
In [35]:
# viewing DBL logs
!ls -thlc *pkl
In [61]:
!SIPSim isotope_incorp \
--comm comm.txt \
--np $nprocs \
ampFrags_kde_dif_DBL.pkl \
incorp.config \
> ampFrags_KDE_dif_DBL_inc.pkl
In [62]:
!ls -thlc *.pkl
Note: statistics on how much isotope was incorporated by each taxon are listed in "BD-shift_stats.txt"
In [63]:
%%R
df = read.delim('BD-shift_stats.txt', sep='\t')
df
In [64]:
!SIPSim OTU_table \
--abs 1e7 \
--np $nprocs \
ampFrags_KDE_dif_DBL_inc.pkl \
comm.txt \
fracs.txt \
> OTU.txt
In [65]:
!head -n 7 OTU.txt
In [77]:
%%R -h 350 -w 750
df = read.delim('OTU.txt', sep='\t')
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
geom_area(stat='identity', position='dodge', alpha=0.5) +
scale_x_continuous(expand=c(0,0)) +
labs(x='Buoyant density') +
labs(y='Amplicon fragment counts') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
p
Notes:
In [79]:
%%R -h 350 -w 750
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
geom_area(stat='identity', position='fill') +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
labs(x='Buoyant density') +
labs(y='Amplicon fragment counts') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
p
In [67]:
!SIPSim OTU_PCR OTU.txt > OTU_PCR.txt
In [68]:
!head -n 5 OTU_PCR.txt
Notes
In [69]:
!SIPSim OTU_subsample OTU_PCR.txt > OTU_PCR_sub.txt
In [70]:
!head -n 5 OTU_PCR_sub.txt
Notes
In [80]:
%%R -h 350 -w 750
df = read.delim('OTU_PCR_sub.txt', sep='\t')
p = ggplot(df, aes(BD_mid, rel_abund, fill=taxon)) +
geom_area(stat='identity', position='fill') +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
labs(x='Buoyant density') +
labs(y='Taxon relative abundances') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
p
Notes
In [81]:
!SIPSim OTU_wideLong -w \
OTU_PCR_sub.txt \
> OTU_PCR_sub_wide.txt
In [82]:
!head -n 4 OTU_PCR_sub_wide.txt
In [83]:
!SIPSim OTU_sampleData \
OTU_PCR_sub.txt \
> OTU_PCR_sub_meta.txt
In [86]:
!head OTU_PCR_sub_meta.txt
In [87]:
!SIPSim -l
In [90]:
!SIPSimR -l
In [ ]: