Make sure you have created the dataset before trying to run this notebook
In [57]:
    
workDir = '../../t/SIPSim_example/'
nprocs = 3
    
In [58]:
    
import os
    
In [59]:
    
# Note: you will need to install `rpy2.ipython` and the necessary R packages (see next cell)
%load_ext rpy2.ipython
    
    
In [60]:
    
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
    
In [61]:
    
workDir = os.path.abspath(workDir)
if not os.path.isdir(workDir):
    os.makedirs(workDir)
%cd $workDir    
genomeDir = os.path.join(workDir, 'genomes_rn')
    
    
The script below ("SIPSim incorpConfigExample") is helpful for making simple experimental designs
In [62]:
    
%%bash
source activate SIPSim
# creating example config
SIPSim incorp_config_example \
  --percTaxa 34 \
  --percIncorpUnif 50 \
  --n_reps 1 \
  > incorp.config
    
In [63]:
    
!cat incorp.config
    
    
In [64]:
    
%%bash
source activate SIPSim
SIPSim communities \
    --config incorp.config \
    ./genomes_rn/genome_index.txt \
    > comm.txt
    
In [65]:
    
!cat comm.txt
    
    
Note: "library" = gradient
In [66]:
    
%%bash 
source activate SIPSim
SIPSim gradient_fractions \
    --BD_min 1.67323 \
    --BD_max 1.7744 \
    comm.txt \
    > fracs.txt
    
In [67]:
    
!head -n 6 fracs.txt
    
    
In [68]:
    
# primers = """>515F
# GTGCCAGCMGCCGCGGTAA
# >806R
# GGACTACHVGGGTWTCTAAT
# """
# F = os.path.join(workDir, '515F-806R.fna')
# with open(F, 'wb') as oFH:
#     oFH.write(primers)
    
# print 'File written: {}'.format(F)
    
In [69]:
    
%%bash -s $genomeDir
source activate SIPSim 
# skewed-normal
SIPSim fragments \
    $1/genome_index.txt \
    --fp $1 \
    --fld skewed-normal,9000,2500,-5 \
    --flr None,None \
    --nf 1000 \
    --debug \
    --tbl \
    > shotFrags.txt
    
    
In [70]:
    
!head -n 5 shotFrags.txt
!tail -n 5 shotFrags.txt
    
    
In [71]:
    
%%R -w 700 -h 350
df = read.delim('shotFrags.txt')
p = ggplot(df, aes(fragGC, fragLength, color=taxon_name)) +
    geom_density2d() +
    scale_color_discrete('Taxon') +
    labs(x='Fragment G+C', y='Fragment length (bp)') +
    theme_bw() +
    theme(
        text = element_text(size=16)
    )
plot(p)
    
    
Note: for information on what's going on in this config file, use the command: SIPSim isotope_incorp -h
In [72]:
    
%%bash 
source activate SIPSim
SIPSim fragment_KDE \
    shotFrags.txt \
    > shotFrags_kde.pkl
    
In [73]:
    
!ls -thlc shotFrags_kde.pkl
    
    
SIPSim KDE_sample
In [74]:
    
%%bash 
source activate SIPSim
SIPSim diffusion \
    shotFrags_kde.pkl \
    --np 3 \
    > shotFrags_kde_dif.pkl
    
    
In [75]:
    
!ls -thlc shotFrags_kde_dif.pkl
    
    
In [76]:
    
n = 100000
    
In [77]:
    
%%bash -s $n
source activate SIPSim
SIPSim KDE_sample -n $1 shotFrags_kde.pkl > shotFrags_kde.txt
SIPSim KDE_sample -n $1 shotFrags_kde_dif.pkl > shotFrags_kde_dif.txt
ls -thlc shotFrags_kde*.txt
    
    
In [78]:
    
%%R
df1 = read.delim('shotFrags_kde.txt', sep='\t')
df2 = read.delim('shotFrags_kde_dif.txt', sep='\t')
df1$data = 'no diffusion'
df2$data = 'diffusion'
df = rbind(df1, df2) %>%
    gather(Taxon, BD, Clostridium_ljungdahlii_DSM_13528, 
           Escherichia_coli_1303, Streptomyces_pratensis_ATCC_33331) %>%
    mutate(Taxon = gsub('_(ATCC|DSM)', '\n\\1', Taxon))
df %>% head(n=3)
    
    
In [79]:
    
%%R -w 800 -h 300
p = ggplot(df, aes(BD, fill=data)) +
    geom_density(alpha=0.25) +
    facet_wrap( ~ Taxon) +    
    scale_fill_discrete('') +
    theme_bw() +
    theme(
        text=element_text(size=16),
        axis.title.y = element_text(vjust=1),
        axis.text.x = element_text(angle=50, hjust=1)
        )
plot(p)
    
    
In [80]:
    
%%bash 
source activate SIPSim
SIPSim DBL \
    shotFrags_kde_dif.pkl \
    --np 3 \
    > shotFrags_kde_dif_DBL.pkl
    
    
In [81]:
    
# viewing DBL logs
!ls -thlc *pkl
    
    
In [82]:
    
%%bash
source activate SIPSim
SIPSim isotope_incorp \
    --comm comm.txt \
    --np 3 \
    shotFrags_kde_dif_DBL.pkl \
    incorp.config \
    > shotFrags_KDE_dif_DBL_inc.pkl
    
    
In [83]:
    
!ls -thlc *.pkl
    
    
Note: statistics on how much isotope was incorporated by each taxon are listed in "BD-shift_stats.txt"
In [84]:
    
%%R
df = read.delim('BD-shift_stats.txt', sep='\t')
df
    
    
In [85]:
    
%%bash
source activate SIPSim
SIPSim OTU_table \
    --abs 1e7 \
    --np 3 \
    shotFrags_KDE_dif_DBL_inc.pkl \
    comm.txt \
    fracs.txt \
    > OTU.txt
    
    
In [86]:
    
!head -n 7 OTU.txt
    
    
In [87]:
    
%%R -h 350 -w 750
df = read.delim('OTU.txt', sep='\t')
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
    geom_area(stat='identity', position='dodge', alpha=0.5) +
    scale_x_continuous(expand=c(0,0)) +
    labs(x='Buoyant density') +
    labs(y='Shotgun fragment counts') +
    facet_grid(library ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
    )
plot(p)
    
    
Notes:
In [88]:
    
%%R -h 350 -w 750
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
    geom_area(stat='identity', position='fill') +
    scale_x_continuous(expand=c(0,0)) +
    scale_y_continuous(expand=c(0,0)) +
    labs(x='Buoyant density') +
    labs(y='Shotgun fragment counts') +
    facet_grid(library ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
    )
plot(p)
    
    
In [89]:
    
%%bash
source activate SIPSim
SIPSim OTU_PCR OTU.txt > OTU_PCR.txt
    
In [90]:
    
!head -n 5 OTU_PCR.txt
!tail -n 5 OTU_PCR.txt
    
    
Notes
In [91]:
    
%%bash
source activate SIPSim
SIPSim OTU_subsample OTU_PCR.txt > OTU_PCR_sub.txt
    
In [92]:
    
!head -n 5 OTU_PCR_sub.txt
    
    
Notes
In [93]:
    
%%R -h 350 -w 750
df = read.delim('OTU_PCR_sub.txt', sep='\t')
p = ggplot(df, aes(BD_mid, rel_abund, fill=taxon)) +
    geom_area(stat='identity', position='fill') +
    scale_x_continuous(expand=c(0,0)) +
    scale_y_continuous(expand=c(0,0)) +
    labs(x='Buoyant density') +
    labs(y='Taxon relative abundances') +
    facet_grid(library ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
    )
plot(p)
    
    
In [94]:
    
%%bash
source activate SIPSim
SIPSim OTU_wide_long -w \
    OTU_PCR_sub.txt \
    > OTU_PCR_sub_wide.txt
    
In [95]:
    
!head -n 4 OTU_PCR_sub_wide.txt
    
    
In [96]:
    
%%bash
source activate SIPSim
SIPSim OTU_sample_data \
    OTU_PCR_sub.txt \
    > OTU_PCR_sub_meta.txt
    
In [97]:
    
!head OTU_PCR_sub_meta.txt
    
    
In [98]:
    
%%bash
source activate SIPSim
SIPSim -l
    
    
In [ ]: