In [1]:
    
workDir = '/home/nick/t/SIPSim/'
nprocs = 3
    
In [2]:
    
import os
import glob
    
In [3]:
    
%load_ext rpy2.ipython
    
In [4]:
    
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
    
    
In [5]:
    
if not os.path.isdir(workDir):
    os.makedirs(workDir)
%cd $workDir    
genomeDir = os.path.join(workDir, 'genomes_rn')
    
    
The script below ("SIPSim incorpConfigExample") is helpful for making simple experimental designs
In [6]:
    
# this file
!SIPSim incorpConfigExample \
  --percTaxa 34 \
  --percIncorpUnif 50 \
  --n_reps 1 \
  > incorp.config
    
In [7]:
    
!cat incorp.config
    
    
In [8]:
    
!SIPSim communities \
    --config incorp.config \
    ./genomes_rn/genome_index.txt \
    > comm.txt
    
In [9]:
    
!cat comm.txt
    
    
Note: "library" = gradient
In [10]:
    
!SIPSim gradient_fractions \
    --BD_min 1.67323 \
    --BD_max 1.7744 \
    comm.txt \
    > fracs.txt
    
In [11]:
    
!head -n 6 fracs.txt
    
    
In [12]:
    
primers = """>515F
GTGCCAGCMGCCGCGGTAA
>806R
GGACTACHVGGGTWTCTAAT
"""
F = os.path.join(workDir, '515F-806R.fna')
with open(F, 'wb') as oFH:
    oFH.write(primers)
    
print 'File written: {}'.format(F)
    
    
In [13]:
    
# skewed-normal
!SIPSim fragments \
    $genomeDir/genome_index.txt \
    --fp $genomeDir \
    --fr 515F-806R.fna \
    --fld skewed-normal,9000,2500,-5 \
    --flr None,None \
    --nf 10000 \
    --np $nprocs \
    --tbl \
    > ampFrags.txt
    
    
In [14]:
    
!head -n 5 ampFrags.txt
    
    
In [15]:
    
%%R -w 700 -h 350
df = read.delim('ampFrags.txt')
ggplot(df, aes(fragGC, fragLength, color=taxon_name)) +
    geom_density2d() +
    scale_color_discrete('Taxon') +
    labs(x='Fragment G+C', y='Fragment length (bp)') +
    theme_bw() +
    theme(
        text = element_text(size=16)
    )
    
    
Note: for information on what's going on in this config file, use the command: SIPSim isotope_incorp -h
In [16]:
    
!SIPSim fragment_KDE \
    ampFrags.txt \
    > ampFrags_kde.pkl
    
SIPSim KDE_sample
In [17]:
    
!SIPSim diffusion \
    ampFrags_kde.pkl \
    --np $nprocs \
    > ampFrags_kde_dif.pkl
    
    
In [20]:
    
n = 100000
!SIPSim KDE_sample -n $n ampFrags_kde.pkl > ampFrags_kde.txt
!SIPSim KDE_sample -n $n ampFrags_kde_dif.pkl > ampFrags_kde_dif.txt
!ls -thlc ampFrags_kde*.txt
    
    
In [32]:
    
%%R
df1 = read.delim('ampFrags_kde.txt', sep='\t')
df2 = read.delim('ampFrags_kde_dif.txt', sep='\t')
df1$data = 'no diffusion'
df2$data = 'diffusion'
df = rbind(df1, df2) %>%
    gather(Taxon, BD, Clostridium_ljungdahlii_DSM_13528, 
           Escherichia_coli_1303, Streptomyces_pratensis_ATCC_33331) %>%
    mutate(Taxon = gsub('_(ATCC|DSM)', '\n\\1', Taxon))
df %>% head(n=3)
    
    
In [33]:
    
%%R -w 800 -h 300
ggplot(df, aes(BD, fill=data)) +
    geom_density(alpha=0.25) +
    facet_wrap( ~ Taxon) +    
    scale_fill_discrete('') +
    theme_bw() +
    theme(
        text=element_text(size=16),
        axis.title.y = element_text(vjust=1),
        axis.text.x = element_text(angle=50, hjust=1)
        )
    
    
In [34]:
    
!SIPSim DBL \
    ampFrags_kde_dif.pkl \
    --np $nprocs \
    > ampFrags_kde_dif_DBL.pkl
    
    
In [35]:
    
# viewing DBL logs
!ls -thlc *pkl
    
    
In [61]:
    
!SIPSim isotope_incorp \
    --comm comm.txt \
    --np $nprocs \
    ampFrags_kde_dif_DBL.pkl \
    incorp.config \
    > ampFrags_KDE_dif_DBL_inc.pkl
    
    
In [62]:
    
!ls -thlc *.pkl
    
    
Note: statistics on how much isotope was incorporated by each taxon are listed in "BD-shift_stats.txt"
In [63]:
    
%%R
df = read.delim('BD-shift_stats.txt', sep='\t')
df
    
    
In [64]:
    
!SIPSim OTU_table \
    --abs 1e7 \
    --np $nprocs \
    ampFrags_KDE_dif_DBL_inc.pkl \
    comm.txt \
    fracs.txt \
    > OTU.txt
    
    
In [65]:
    
!head -n 7 OTU.txt
    
    
In [77]:
    
%%R -h 350 -w 750
df = read.delim('OTU.txt', sep='\t')
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
    geom_area(stat='identity', position='dodge', alpha=0.5) +
    scale_x_continuous(expand=c(0,0)) +
    labs(x='Buoyant density') +
    labs(y='Amplicon fragment counts') +
    facet_grid(library ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
    )
p
    
    
Notes:
In [79]:
    
%%R -h 350 -w 750
p = ggplot(df, aes(BD_mid, count, fill=taxon)) +
    geom_area(stat='identity', position='fill') +
    scale_x_continuous(expand=c(0,0)) +
    scale_y_continuous(expand=c(0,0)) +
    labs(x='Buoyant density') +
    labs(y='Amplicon fragment counts') +
    facet_grid(library ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
    )
p
    
    
In [67]:
    
!SIPSim OTU_PCR OTU.txt > OTU_PCR.txt
    
In [68]:
    
!head -n 5 OTU_PCR.txt
    
    
Notes
In [69]:
    
!SIPSim OTU_subsample OTU_PCR.txt > OTU_PCR_sub.txt
    
In [70]:
    
!head -n 5 OTU_PCR_sub.txt
    
    
Notes
In [80]:
    
%%R -h 350 -w 750
df = read.delim('OTU_PCR_sub.txt', sep='\t')
p = ggplot(df, aes(BD_mid, rel_abund, fill=taxon)) +
    geom_area(stat='identity', position='fill') +
    scale_x_continuous(expand=c(0,0)) +
    scale_y_continuous(expand=c(0,0)) +
    labs(x='Buoyant density') +
    labs(y='Taxon relative abundances') +
    facet_grid(library ~ .) +
    theme_bw() +
    theme( 
        text = element_text(size=16),
        axis.title.y = element_text(vjust=1),        
        axis.title.x = element_blank()
    )
p
    
    
Notes
In [81]:
    
!SIPSim OTU_wideLong -w \
    OTU_PCR_sub.txt \
    > OTU_PCR_sub_wide.txt
    
In [82]:
    
!head -n 4 OTU_PCR_sub_wide.txt
    
    
In [83]:
    
!SIPSim OTU_sampleData \
    OTU_PCR_sub.txt \
    > OTU_PCR_sub_meta.txt
    
In [86]:
    
!head OTU_PCR_sub_meta.txt
    
    
In [87]:
    
!SIPSim -l
    
    
In [90]:
    
!SIPSimR -l
    
    
In [ ]: