Initial dataset setup for priming_exp validation


In [2]:
primingExpDir = '/var/seq_data/priming_exp/'
otuTableFile = '/var/seq_data/priming_exp/data/otu_table.txt'
otuTableSumFile = '/var/seq_data/priming_exp/data/otu_table_summary.txt'
otuRepFile = '/var/seq_data/priming_exp/otusn.pick.fasta'

Init


In [3]:
import glob
from os.path import abspath
from IPython.display import Image

In [4]:
%load_ext rpy2.ipython

In [5]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Loading required package: grid

Checking files


In [11]:
%%bash -s "$otuTableFile" "$otuRepFile" 

printf "Number of OTUs in table file: "
wc -l <(tail -n +2 $1)

printf "Number of sequences in OTU rep file: "
grep -c ">" $2


Number of OTUs in table file: 10361 /dev/fd/8
Number of sequences in OTU rep file: 10361

In [17]:
%%bash -s "$otuTableFile" "$otuRepFile" 
# checking overlap

tail -n +2 $1 | head | cut -f 1 | \
    xargs -I % grep % $2 | wc -l
    
tail $1 | cut -f 1 | \
    xargs -I % grep % $2 | wc -l


10
10
tail: write error

Dataset naming scheme


In [6]:
%%R -i otuTableFile

tbl = read.delim(otuTableFile, sep='\t')
col.n = colnames(tbl) %>% as.data.frame
colnames(col.n) = 'X'
col.n = filter(col.n, X != 'OTUId')
col.n %>% head


                  X
1 X12C.700.45.01.24
2 X12C.700.14.06.14
3 X12C.100.14.05.18
4 X12C.700.14.06.05
5 X12C.000.14.05.18
6 X12C.100.14.05.11

In [9]:
%%R
col.ns = col.n %>% 
    separate(X, c('isotope','treatment','day','rep','fraction'), sep='\\.') %>%
    mutate(day = as.numeric(day),
           rep = as.character(rep),
           fraction = as.numeric(fraction))
           

col.ns %>% head


  isotope treatment day rep fraction
1    X12C       700  45  01       24
2    X12C       700  14  06       14
3    X12C       100  14  05       18
4    X12C       700  14  06        5
5    X12C       000  14  05       18
6    X12C       100  14  05       11

In [17]:
%%R -w 800 -h 400

col.ns.f = filter(col.ns, grepl('X1[23]C', isotope))

ggplot(col.ns.f, aes(day, fraction, color=rep)) +
    geom_point() +
    facet_grid(isotope ~ treatment)



In [24]:
%%R -w 600 -h 600

col.ns.f$day = factor(col.ns.f$day, levels=c(14, 28, 45))

ggplot(col.ns.f, aes(fraction, rep, color=isotope)) +
    geom_point() +
    facet_grid(day ~ treatment)



In [23]:
%%R -w 500 -h 300

col.ns.cc = filter(col.ns, !grepl('X1[23]C', isotope))

ggplot(col.ns.cc, aes(day, rep)) +
    geom_point()