Make sure you have created the dataset before trying to run this notebook
In [57]:
workDir = '../../t/SIPSim_example/'
nprocs = 3
In [58]:
import os
In [59]:
# Note: you will need to install `rpy2.ipython` and the necessary R packages (see next cell)
%load_ext rpy2.ipython
In [60]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
In [61]:
workDir = os.path.abspath(workDir)
if not os.path.isdir(workDir):
os.makedirs(workDir)
%cd $workDir
genomeDir = os.path.join(workDir, 'genomes_rn')
The script below ("SIPSim incorpConfigExample
") is helpful for making simple experimental designs
In [62]:
%%bash
source activate SIPSim
# creating example config
SIPSim incorp_config_example \
--percTaxa 34 \
--percIncorpUnif 50 \
--n_reps 1 \
> incorp.config
In [63]:
!cat incorp.config
In [64]:
%%bash
source activate SIPSim
SIPSim communities \
--config incorp.config \
./genomes_rn/genome_index.txt \
> comm.txt
In [65]:
!cat comm.txt
Note: "library" = gradient
In [66]:
%%bash
source activate SIPSim
SIPSim gradient_fractions \
--BD_min 1.67323 \
--BD_max 1.7744 \
comm.txt \
> fracs.txt
In [67]:
!head -n 6 fracs.txt
In [68]:
# primers = """>515F
# GTGCCAGCMGCCGCGGTAA
# >806R
# GGACTACHVGGGTWTCTAAT
# """
# F = os.path.join(workDir, '515F-806R.fna')
# with open(F, 'wb') as oFH:
# oFH.write(primers)
# print 'File written: {}'.format(F)
In [69]:
%%bash -s $genomeDir
source activate SIPSim
# skewed-normal
SIPSim fragments \
$1/genome_index.txt \
--fp $1 \
--fld skewed-normal,9000,2500,-5 \
--flr None,None \
--nf 1000 \
--debug \
--tbl \
> shotFrags.txt
In [70]:
!head -n 5 shotFrags.txt
!tail -n 5 shotFrags.txt
In [71]:
%%R -w 700 -h 350
df = read.delim('shotFrags.txt')
p = ggplot(df, aes(fragGC, fragLength, color=taxon_name)) +
geom_density2d() +
scale_color_discrete('Taxon') +
labs(x='Fragment G+C', y='Fragment length (bp)') +
theme_bw() +
theme(
text = element_text(size=16)
)
plot(p)
Note: for information on what's going on in this config file, use the command: SIPSim isotope_incorp -h
In [72]:
%%bash
source activate SIPSim
SIPSim fragment_KDE \
shotFrags.txt \
> shotFrags_kde.pkl
In [73]:
!ls -thlc shotFrags_kde.pkl
SIPSim KDE_sample
In [74]:
%%bash
source activate SIPSim
SIPSim diffusion \
shotFrags_kde.pkl \
--np 3 \
> shotFrags_kde_dif.pkl
In [75]:
!ls -thlc shotFrags_kde_dif.pkl
In [76]:
n = 100000
In [77]:
%%bash -s $n
source activate SIPSim
SIPSim KDE_sample -n $1 shotFrags_kde.pkl > shotFrags_kde.txt
SIPSim KDE_sample -n $1 shotFrags_kde_dif.pkl > shotFrags_kde_dif.txt
ls -thlc shotFrags_kde*.txt
In [78]:
%%R
df1 = read.delim('shotFrags_kde.txt', sep='\t')
df2 = read.delim('shotFrags_kde_dif.txt', sep='\t')
df1$data = 'no diffusion'
df2$data = 'diffusion'
df = rbind(df1, df2) %>%
gather(Taxon, BD, Clostridium_ljungdahlii_DSM_13528,
Escherichia_coli_1303, Streptomyces_pratensis_ATCC_33331) %>%
mutate(Taxon = gsub('_(ATCC|DSM)', '\n\\1', Taxon))
df %>% head(n=3)
In [79]:
%%R -w 800 -h 300
p = ggplot(df, aes(BD, fill=data)) +
geom_density(alpha=0.25) +
facet_wrap( ~ Taxon) +
scale_fill_discrete('') +
theme_bw() +
theme(
text=element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.text.x = element_text(angle=50, hjust=1)
)
plot(p)
In [80]:
%%bash
source activate SIPSim
SIPSim DBL \
shotFrags_kde_dif.pkl \
--np 3 \
> shotFrags_kde_dif_DBL.pkl
In [81]:
# viewing DBL logs
!ls -thlc *pkl
In [82]:
%%bash
source activate SIPSim
SIPSim isotope_incorp \
--comm comm.txt \
--np 3 \
shotFrags_kde_dif_DBL.pkl \
incorp.config \
> shotFrags_KDE_dif_DBL_inc.pkl
In [83]:
!ls -thlc *.pkl
Note: statistics on how much isotope was incorporated by each taxon are listed in "BD-shift_stats.txt"
In [84]:
%%R
df = read.delim('BD-shift_stats.txt', sep='\t')
df
In [85]:
%%bash
source activate SIPSim
SIPSim OTU_table \
--abs 1e7 \
--np 3 \
shotFrags_KDE_dif_DBL_inc.pkl \
comm.txt \
fracs.txt \
> OTU.txt
In [86]:
!head -n 7 OTU.txt
In [87]:
%%R -h 350 -w 750
df = read.delim('OTU.txt', sep='\t')
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
geom_area(stat='identity', position='dodge', alpha=0.5) +
scale_x_continuous(expand=c(0,0)) +
labs(x='Buoyant density') +
labs(y='Shotgun fragment counts') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
plot(p)
Notes:
In [88]:
%%R -h 350 -w 750
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
geom_area(stat='identity', position='fill') +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
labs(x='Buoyant density') +
labs(y='Shotgun fragment counts') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
plot(p)
In [89]:
%%bash
source activate SIPSim
SIPSim OTU_PCR OTU.txt > OTU_PCR.txt
In [90]:
!head -n 5 OTU_PCR.txt
!tail -n 5 OTU_PCR.txt
Notes
In [91]:
%%bash
source activate SIPSim
SIPSim OTU_subsample OTU_PCR.txt > OTU_PCR_sub.txt
In [92]:
!head -n 5 OTU_PCR_sub.txt
Notes
In [93]:
%%R -h 350 -w 750
df = read.delim('OTU_PCR_sub.txt', sep='\t')
p = ggplot(df, aes(BD_mid, rel_abund, fill=taxon)) +
geom_area(stat='identity', position='fill') +
scale_x_continuous(expand=c(0,0)) +
scale_y_continuous(expand=c(0,0)) +
labs(x='Buoyant density') +
labs(y='Taxon relative abundances') +
facet_grid(library ~ .) +
theme_bw() +
theme(
text = element_text(size=16),
axis.title.y = element_text(vjust=1),
axis.title.x = element_blank()
)
plot(p)
In [94]:
%%bash
source activate SIPSim
SIPSim OTU_wide_long -w \
OTU_PCR_sub.txt \
> OTU_PCR_sub_wide.txt
In [95]:
!head -n 4 OTU_PCR_sub_wide.txt
In [96]:
%%bash
source activate SIPSim
SIPSim OTU_sample_data \
OTU_PCR_sub.txt \
> OTU_PCR_sub_meta.txt
In [97]:
!head OTU_PCR_sub_meta.txt
In [98]:
%%bash
source activate SIPSim
SIPSim -l
In [ ]: