In [1]:
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome1210/'
inFileDir = os.path.join(workDir, 'atomIncorp_taxaIncorp')
buildDir = os.path.join(workDir, 'atomIncorp_taxaIncorp_NMDS')
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
In [2]:
import glob
from os.path import abspath
import nestly
from IPython.display import Image, display
In [3]:
%load_ext rpy2.ipython
In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
library(vegan)
In [112]:
if not os.path.isdir(buildDir):
os.makedirs(buildDir)
In [113]:
%%R -i inFileDir
files = list.files(inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq', recursive=T)
dirs = gsub('/[0-9]+/[^/]+$','', files) %>% unique %>% as.data.frame
colnames(dirs) = c('dirs')
percTaxa.to.keep = c(1,10,25,50)
tbl = dirs %>%
separate(dirs, c('percIncorp','percTaxa'), sep='/', remove=F) %>%
filter(percTaxa %in% percTaxa.to.keep)
tbl %>% nrow %>% print
tbl %>% head
In [165]:
%%R -i inFileDir
get.files = function(dir, base.path, pattern){
dir = as.character(dir)
p = paste(c(base.path, dir), collapse='/')
files = list.files(p, pattern=pattern, recursive=T)
p = gsub('$', '/', p)
files = gsub('^', p, files)
return(files)
}
get.otu.tbls = function(files){
tbl.l = list()
for(f in files){
physeq = readRDS(f)
id = gsub('/[^/]+$', '', f)
id = gsub('.+atomIncorp_taxaIncorp/', '', id)
id = paste(c(id,'__'), collapse='')
otu.tbl = physeq %>% otu_table %>% as.data.frame
colnames(otu.tbl) = gsub('^', id, colnames(otu.tbl))
otu.tbl$taxon = rownames(otu.tbl)
tbl.l[[f]] = otu.tbl
}
return(tbl.l)
}
join.otu.tbls = function(tbl.l){
n = names(tbl.l)
tbl = tbl.l[[n[1]]]
for (i in seq(2:length(n))){
n.i = n[i]
tbl = left_join(tbl, tbl.l[[n.i]], c('taxon'='taxon'))
}
# edit table
tbl[is.na(tbl)] = 0
rownames(tbl) = tbl$taxon
tbl$taxon = NULL
# return
return(tbl)
}
make.meta = function(tbl, BD.range = seq(1.6, 1.9, 0.004)){
meta = tbl %>% colnames %>% as.data.frame
colnames(meta) = c('X')
meta = filter(meta, X != 'taxon')
# midpoint function
mid = function(x, y){ (x + y)/2 }
# making fixed BD-range
#BD.range = seq(1.6, 1.9, 0.004)
#print(meta); break
# edit metadata
meta = meta %>%
#separate(X, c('percIncorp','percTaxa','rep','library','BD_range'), sep='__', remove=F) %>%
separate(X, c('tmp1', 'library', 'tmp2'), sep='__', remove=F) %>%
separate(tmp1, c('percIncorp','percTaxa', 'rep'), sep='/') %>%
mutate(tmp2 = gsub('.[xy]$', '', tmp2)) %>%
separate(tmp2, c('BD.min', 'BD.max'), sep='-') %>%
mutate(BD.min = as.numeric(BD.min),
BD.max = as.numeric(BD.max),
BD.mid = mapply(mid, BD.min, BD.max),
BD.bin = cut(BD.mid, breaks=BD.range)) %>%
unite(groups, library, BD.bin, sep='__')
return(meta)
}
edit.ellipse = function(elps){
# midpoint function
mid = function(x, y){ (x + y)/2 }
elps.tbl = elps %>% summary %>% t %>% as.data.frame
elps.tbl$meta = rownames(elps.tbl)
elps.tbl = elps.tbl %>%
separate(meta, c('library','BD.range'), sep='__') %>%
mutate(BD.range = gsub('^.', '', BD.range),
BD.range = gsub('.$', '', BD.range)) %>%
separate(BD.range, c('BD.min','BD.max'), sep=',', remove=F) %>%
mutate(
BD.min = as.numeric(BD.min),
BD.max = as.numeric(BD.max),
BD.mid = mapply(mid, BD.min, BD.max))
}
In [166]:
%%R
# wrapper function
get.ellipses = function(x){
dir = x['dirs']
# test run of functions
files = get.files(dir, base.path=inFileDir, pattern='OTU_n2_abs1e9_sub-norm_filt.physeq')
otu.tbls = get.otu.tbls(files)
otu.tbl = join.otu.tbls(otu.tbls)
# making metadata
meta = make.meta(otu.tbl, BD.range=seq(1.6, 2, 0.002))
ord = metaMDS(otu.tbl %>% t)
# getting ellipse
plot(ord, type = "p", display='sites')
elps = ordiellipse(ord, groups=meta$groups, kind="se", conf=0.95, lwd=2, col="blue")
# edit ellipse file
elps.tbl = edit.ellipse(elps)
elps.tbl$percIncorp = x['percIncorp'] %>% as.vector
elps.tbl$percTaxa = x['percTaxa'] %>% as.vector
return(elps.tbl)
}
In [167]:
%%R
elps.tbls = apply(tbl, 1, get.ellipses)