In [1]:
%load_ext rpy2.ipython
In [2]:
%%R
workDir = '/home/nick/notebook/SIPSim/dev/fullCyc/'
physeqDir = '/home/nick/notebook/fullCyc/data/MiSeq_16S/515f-806r/V4_Lib1-7/phyloseq/'
physeqBulkCore = 'bulk-core'
physeqSIP = 'SIP-core_unk'
In [3]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(phyloseq)
library(fitdistrplus)
library(sads)
In [4]:
%%R
dir.create(workDir, showWarnings=FALSE)
In [5]:
%%R
# bulk core samples
F = file.path(physeqDir, physeqBulkCore)
physeq.bulk = readRDS(F)
physeq.bulk.m = physeq.bulk %>% sample_data
physeq.bulk
In [6]:
%%R
# SIP core samples
F = file.path(physeqDir, physeqSIP)
physeq.SIP = readRDS(F)
physeq.SIP.m = physeq.SIP %>% sample_data
physeq.SIP
In [303]:
%%R
physeq.bulk.12C = prune_samples(physeq.bulk.m$Substrate == '12C-Con', physeq.bulk) %>%
filter_taxa(function(x) sum(x) > 0, TRUE)
physeq.bulk.12C.m = physeq.bulk.12C %>% sample_data
physeq.bulk.12C
In [304]:
%%R
physeq.SIP.12C = prune_samples(physeq.SIP.m$Substrate == '12C-Con', physeq.SIP) %>%
transform_sample_counts(function(x) x/sum(x))
physeq.SIP.12C
In [305]:
%%R
# number of fractions per 12C day
as.Num = function(x) as.numeric(as.character(x))
physeq.SIP.12C %>% sample_data %>%
as.matrix %>% as.data.frame %>%
group_by(Day) %>%
summarize(n = n()) %>%
mutate(Day = as.Num(Day)) %>%
arrange(Day)
In [323]:
%%R -w 700
p = plot_richness(physeq.bulk.12C, measures=c('Observed', 'Chao1'))
p = p + theme(text = element_text(size=18))
p
In [339]:
%%R
ggplot_build(p)$data[[2]] %>% arrange(PANEL, x)
In [280]:
%%R
df.OTU = physeq.bulk.12C %>%
transform_sample_counts(function(x) x/sum(x)) %>%
otu_table %>%
as.matrix %>% as.data.frame
df.OTU$OTU = rownames(df.OTU)
df.OTU = df.OTU %>%
gather('sample', 'abundance', 1:(ncol(df.OTU)-1))
df.OTU %>% head(n=3)
In [281]:
%%R -w 700 -h 350
days = physeq.bulk.12C.m$Day %>% unique %>% as.character
days = reorder(days, days %>% as.Num)
df.OTU.l = list()
for(day in days %>% sort){
day %>% print
rx = paste(c('\\.D', day, '\\.'), collapse='')
df.OTU.l[[day]] = df.OTU %>%
filter(grepl(rx, sample), abundance > 0)
plotdist(df.OTU.l[[day]]$abundance)
}
In [282]:
%%R -w 450 -h 400
lapply(df.OTU.l, function(x) descdist(x$abundance, boot=1000))
In [294]:
%%R
tmp1 = df.OTU.l[['1']]$abundance
f.norm = fitdist(tmp1, 'norm')
f.exp = fitdist(tmp1, 'exp')
f.logn = fitdist(tmp1, 'lnorm')
f.gamma = fitdist(tmp1, 'gamma')
f.beta = fitdist(tmp1, 'beta')
f.list = list(f.norm, f.exp, f.logn, f.gamma, f.beta)
plot.legend = c('normal', 'exponential', 'lognormal', 'gamma', 'beta')
par(mfrow = c(2,1))
denscomp(f.list, legendtext=plot.legend)
qqcomp(f.list, legendtext=plot.legend)
In [295]:
%%R
gofstat(f.list, fitnames=plot.legend)
In [296]:
%%R
summary(f.beta)
In [299]:
%%R
tmp1 %>% summary %>% print
rbeta(10000, 0.605, 1206.999) %>% summary
In [307]:
%%R
physeq.SIP.12C = prune_samples(physeq.SIP.m$Substrate == '12C-Con', physeq.SIP)
physeq.SIP.12C
In [308]:
%%R
df.OTU = physeq.SIP.12C %>%
otu_table %>%
as.matrix %>% as.data.frame
df.OTU$OTU = rownames(df.OTU)
df.OTU = df.OTU %>%
gather('sample', 'abundance', 1:(ncol(df.OTU)-1))
df.OTU %>% head(n=3)
In [309]:
%%R -h 300
df.OTU.s = df.OTU %>%
group_by(sample) %>%
summarize(total_abundance = sum(abundance))
cat('Number of samples (fractions): ', nrow(df.OTU.s), '\n')
p = ggplot(df.OTU.s, aes(total_abundance)) +
geom_histogram(binwidth=100, aes(y = ..density..)) +
geom_density() +
theme_bw() +
theme(
text = element_text(size=16)
)
p
In [310]:
%%R -w 700 -h 350
plotdist(df.OTU.s$total_abundance)
In [311]:
%%R -w 450 -h 400
descdist(df.OTU.s$total_abundance, boot=1000)
In [312]:
%%R
f.n = fitdist(df.OTU.s$total_abundance, 'norm')
f.ln = fitdist(df.OTU.s$total_abundance, 'lnorm')
f.ll = fitdist(df.OTU.s$total_abundance, 'logis')
f.list = list(f.n, f.ln, f.ll)
plot.legend = c('normal', 'log-normal', 'logistic')
par(mfrow = c(2,1))
denscomp(f.list, legendtext=plot.legend)
qqcomp(f.list, legendtext=plot.legend)
In [313]:
%%R
gofstat(list(f.n, f.ln, f.ll), fitnames=plot.legend)
In [314]:
%%R
summary(f.ln)
In [315]:
%%R -h 300
rlnorm(1000, 9.432, 0.5) %>% hist(breaks=30)
In [ ]:
In [ ]: