In [1]:
%load_ext rpy2.ipython
In [2]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
library(phyloseq)
library(vegan)
library(scatterplot3d)
In [62]:
%%R
otu.tbl.file1 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/0/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
otu.tbl.file2 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/100/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
physeq1 = readRDS(otu.tbl.file1)
physeq2 = readRDS(otu.tbl.file2)
In [63]:
%%R
ord1 = ordinate(physeq1, method='NMDS', distance='bray')
#p1 = plot_ordination(physeq1, ord1, justDF=T)
ord2 = ordinate(physeq2, method='NMDS', distance='bray')
#p2 = plot_ordination(physeq2, ord2, justDF=T)
ord1 %>% head %>% print
ord2 %>% head %>% print
In [69]:
%%R
otu.tbl = physeq1 %>% otu_table %>% t
bc1 = vegdist(otu.tbl)
bc1.col = data.frame(t(combn(rownames(otu.tbl),2)), as.numeric(bc1))
colnames(bc1.col) = c('X1', 'X2', 'dist')
bc1.col$X1.lib = gsub('__.+', '', bc1.col$X1)
bc1.col$X2.lib = gsub('__.+', '', bc1.col$X2)
bc1.col$X1.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc1.col$X1)
bc1.col$X2.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc1.col$X2)
bc1.col$X1.F.end = gsub('.+-', '', bc1.col$X1)
bc1.col$X2.F.end = gsub('.+-', '', bc1.col$X2)
bc1.col %>% head
In [70]:
%%R
otu.tbl = physeq2 %>% otu_table %>% t
bc2 = vegdist(otu.tbl)
bc2.col = data.frame(t(combn(rownames(otu.tbl),2)), as.numeric(bc2))
colnames(bc2.col) = c('X1', 'X2', 'dist')
bc2.col$X1.lib = gsub('__.+', '', bc2.col$X1)
bc2.col$X2.lib = gsub('__.+', '', bc2.col$X2)
bc2.col$X1.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc2.col$X1)
bc2.col$X2.F.start = gsub('.+__([0-9.]+)-.+', '\\1', bc2.col$X2)
bc2.col$X1.F.end = gsub('.+-', '', bc2.col$X1)
bc2.col$X2.F.end = gsub('.+-', '', bc2.col$X2)
bc2.col %>% head
In [93]:
%%R
bc1.col$file = 1
bc2.col$file = 2
asNum = function (x){ as.numeric(as.character(x)) }
tbl.j = rbind(bc1.col, bc2.col)
tbl.j = tbl.j %>%
filter(X1.lib != X2.lib) %>%
mutate(X1.F.start = asNum(X1.F.start),
X2.F.start = asNum(X2.F.start),
F.start.dist = abs(X1.F.start - X2.F.start),
dist = asNum(dist),
file = as.character(file))
#scatterplot3d(tbl.j$X1.F.start, tbl.j$X2.F.start, tbl.j$dist, color=tbl.j$file)
In [98]:
%%R -w 500 -h 400
ggplot(tbl.j, aes(F.start.dist, dist, color=file)) +
geom_point()
In [99]:
%%R
otu.tbl.file1 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/0/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
otu.tbl.file2 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/100/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
physeq1 = readRDS(otu.tbl.file1)
physeq2 = readRDS(otu.tbl.file2)
In [103]:
%%R
ord1 = ordinate(physeq1, method='NMDS', distance='bray')
ord2 = ordinate(physeq2, method='NMDS', distance='bray')
ord1 %>% scores %>% head %>% print
ord2 %>% scores %>% head %>% print
In [124]:
%%R
get.fracs = function(ord){
fracs = gsub('.+__', '', rownames(ord %>% scores)) %>% as.data.frame()
colnames(fracs) = c('fractions')
fracs = fracs %>%
separate(fractions, c('start','end'), sep='-', convert=T) %>%
mutate(start = start * 1000,
end = end * 1000)
return(fracs)
}
ord1.f = get.fracs(ord1)
ord2.f = get.fracs(ord2)
In [125]:
%%R
library(IRanges)
In [130]:
%%R
ord1.r = IRanges(start=ord1.f$start, end=ord1.f$end)
ord2.r = IRanges(start=ord2.f$start, end=ord2.f$end)
In [139]:
%%R
ov = findOverlaps(ord1.r, ord2.r, select='first')
ov
In [138]:
%%R
ov = findOverlaps(ord1.r, ord2.r)
ov
In [3]:
%%R
otu.tbl.file1 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/0/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
otu.tbl.file2 = '/home/nick/notebook/SIPSim/dev/bac_genome1210/atomIncorp_taxaIncorp/100/10/1/OTU_n2_abs1e9_sub-norm_filt.physeq'
physeq1 = readRDS(otu.tbl.file1)
physeq2 = readRDS(otu.tbl.file2)
In [7]:
%%R
ord1 = ordinate(physeq1, method='NMDS', distance='bray')
ord2 = ordinate(physeq2, method='NMDS', distance='bray')
In [52]:
%%R
grps = as.character(rep(seq(1,nrow(ord1$points) / 2), 2))
grps = append(grps, '2')
plot(ord1, type = "p", display='sites')
elps = ordiellipse(ord1, grps, kind="se", conf=0.95, lwd=2, col="blue")
elps = elps %>% summary %>% t %>% as.data.frame
elps
In [53]:
%%R
ggplot(elps, aes(NMDS1, NMDS2)) +
geom_point()
In [64]:
%%R
get.ellipse = function(ord){
grps = as.character(rep(seq(1,nrow(ord$points) / 2), 2))
grps = append(grps, '2')
plot(ord, type = "p", display='sites')
elps = ordiellipse(ord, grps, kind="se", conf=0.95, lwd=2, col="blue")
elps = elps %>% summary %>% t %>% as.data.frame
return(elps)
}
get.ellipse(ord1)
In [86]:
%%R
mid = function(x, y){ (x + y)/2 }
get.BD.range = function(tbl){
tbl = as.data.frame(tbl)
tbl$lib = gsub('__.+', '', rownames(tbl)) %>% as.character
tbl$BD.start = gsub('.+__([0-9.]+)-.+', '\\1', rownames(tbl)) %>% as.numeric
tbl$BD.end = gsub('.+-', '', rownames(tbl)) %>% as.numeric
tbl$BD.mid = mapply(mid, tbl$BD.start, tbl$BD.end)
return(tbl)
}
ord.BD = get.BD.range(ord1 %>% scores)
ord.BD %>% head
In [87]:
%%R
# making fixed BD-range & binning by BD.mid
BD.range = seq(1.6, 1.9, 0.004)
BD.range
In [ ]: