# Goal

• Exploring potential methods of comparing HR-SIP gradient data (compositional data) that reflects differences in peak height/width/location in eudlidean space (absolute abundances)

# Init

In [88]:

In [89]:
%%R
library(dplyr)
library(tidyr)
library(ggplot2)
library(compositions)
library(coenocliner)
library(vegan)
library(gridExtra)

# Creating test communities

• varying parameters:
• peak locations (species optima)
• peak widths (species tolerances)
• peak heights (max abundances)

## Varying peak locations

In [90]:
%%R
set.seed(2)
M = 1000                                      # number of species
ming = 1.67323                                # gradient minimum...
maxg = 1.76                                # ...and maximum
#meang = mean(c(ming, maxg))
meang = 1.71
locs = seq(ming, maxg, length = 30)        # gradient locations
opt  = rnorm(M, mean=meang, sd=0.005)          # runif(M, min = ming, max = maxg)   # species optima
tol  = rep(0.005, M)                       # species tolerances
h    = ceiling(rlnorm(M, meanlog = 11))    # max abundances
pars = cbind(opt = opt, tol = tol, h = h)  # put in a matrix

In [91]:
%%R -w 600 -h 350
opt1  = rnorm(M, mean=meang, sd=0.001)
pars = cbind(opt = opt1, tol = tol, h = h)  # put in a matrix

PL1 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

# community total abundance
apply(PL1 %>% as.matrix, 1, sum) %>% sum %>% print

matplot(locs, PL1, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

[1] 420828095

In [92]:
%%R -w 600 -h 350
opt1  = rnorm(M, mean=meang, sd=0.005)
pars = cbind(opt = opt1, tol = tol, h = h)  # put in a matrix

PL2 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PL2, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

In [93]:
%%R -w 600 -h 350
opt1  = rnorm(M, mean=meang, sd=0.01)
pars = cbind(opt = opt1, tol = tol, h = h)  # put in a matrix

PL3 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PL3, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

## Varying peak widths

In [94]:
%%R -w 600 -h 350
tol1  = rep(0.0001, M)                       # species tolerances
pars = cbind(opt = opt, tol = tol1, h = h)  # put in a matrix

PW1 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PW1, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

In [95]:
%%R -w 600 -h 350
tol1  = rep(0.005, M)                       # species tolerances
pars = cbind(opt = opt, tol = tol1, h = h)  # put in a matrix

PW2 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PW2, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

In [96]:
%%R -w 600 -h 350
tol1  = rep(0.01, M)                       # species tolerances
pars = cbind(opt = opt, tol = tol1, h = h)  # put in a matrix

PW3 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PW3, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

## Varying peak heights

In [97]:
%%R -w 600 -h 350
h1    = ceiling(rlnorm(M, meanlog = 9))    # max abundances
pars = cbind(opt = opt, tol = tol, h = h1)  # put in a matrix

PH1 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PH1, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

In [98]:
%%R -w 600 -h 350
h1    = ceiling(rlnorm(M, meanlog = 11))    # max abundances
pars = cbind(opt = opt, tol = tol, h = h1)  # put in a matrix

PH2 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PH2, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

In [99]:
%%R -w 600 -h 350
h1    = ceiling(rlnorm(M, meanlog = 13))    # max abundances
pars = cbind(opt = opt, tol = tol, h = h1)  # put in a matrix

PH3 <- coenocline(locs, responseModel = "gaussian", params = pars,
countModel = "poisson")

matplot(locs, PH3, lty = "solid", type = "l", xlab = "BD", ylab = "Abundance")

In [100]:
%%R
# making a list
comm.l = list(
"PL1" = PL1,
"PL2" = PL2,
"PL3" = PL3,
"PW1" = PW1,
"PW2" = PW2,
"PW3" = PW3,
"PH1" = PH1,
"PH2" = PH2,
"PH3" = PH3
)
comm.l %>% names

[1] "PL1" "PL2" "PL3" "PW1" "PW2" "PW3" "PH1" "PH2" "PH3"

In [69]:
%%R

nzero = mtx[mtx == 0] %>% length
mtx[mtx == 0] = round(runif(nzero, 0, 1), 0)
return(mtx)
}

comm.l[[1]][1:5, 1:5]

[,1] [,2] [,3] [,4] [,5]
[1,]    1    1    1    0    1
[2,]    0    1    1    1    1
[3,]    1    1    0    0    1
[4,]    0    1    1    1    0
[5,]    3    1    0    1    1

# Converting to relative abundances

In [127]:
%%R
to.rel = function(x){
y = sum(x)
if(y == 0){
return(x)
} else {
return(x / y)
}
}

comm.l.abs = lapply(comm.l, function(x) x %>% apply(1, function(x) x) %>% t %>% as.data.frame)

df = do.call(rbind, comm.l.abs) %>% as.data.frame
df\$BD = rep(locs, comm.l.abs %>% names %>% length)
df\$dataset = rownames(df)

df = df %>%
gather(OTU, rel_abund, starts_with('V')) %>%
mutate(dataset = gsub('\\.[0-9]+\$', '', dataset))

BD dataset OTU rel_abund
1 1.673230     PL1  V1         0
2 1.676222     PL1  V1         0
3 1.679214     PL1  V1         0
4 1.682206     PL1  V1         0
5 1.685198     PL1  V1         3
6 1.688190     PL1  V1        39

In [130]:
%%R -w 800 -h 600

comm.l.abs = lapply(comm.l, function(x) x %>% apply(1, function(x) x) %>% t %>% as.data.frame)

df = do.call(rbind, comm.l.abs) %>% as.data.frame
df\$BD = rep(locs, comm.l.abs %>% names %>% length)
df\$dataset = rownames(df)
df = df %>%
gather(OTU, abs_abund, starts_with('V')) %>%
mutate(dataset = gsub('\\.[0-9]+\$', '', dataset))

p1 = ggplot(df, aes(BD, abs_abund, group=OTU)) +
geom_line(alpha=0.5) +
labs(x='Buoyant density', y='Abundance') +
facet_wrap(~dataset) +
theme_bw() +
theme(
text = element_text(size=16)
)
p1

In [103]:
%%R -w 800 -h 600

to.rel = function(x){
y = sum(x)
if(y == 0){
return(x)
} else {
return(x / y)
}
}

comm.l.rel = lapply(comm.l, function(x) x %>% apply(1, to.rel) %>% t %>% as.data.frame)

df.rel = do.call(rbind, comm.l.rel) %>% as.data.frame
df.rel\$BD = rep(locs, comm.l.rel %>% names %>% length)
df.rel\$dataset = rownames(df.rel)
df.rel = df.rel %>%
gather(OTU, rel_abund, starts_with('V')) %>%
mutate(dataset = gsub('\\.[0-9]+\$', '', dataset))

p1 = ggplot(df.rel, aes(BD, rel_abund, group=OTU)) +
geom_line() +
labs(x='Buoyant density', y='Relative abundance') +
facet_wrap(~dataset) +
theme_bw() +
theme(
text = element_text(size=16)
)
p1

# Comparing communities

## Shannon diversity

In [106]:
%%R -w 800
df.shan = lapply(comm.l.rel, function(x) diversity(x))
df.shan = do.call(rbind, df.shan) %>% t %>% as.data.frame
df.shan\$BD = locs
df.shan = df.shan %>%
gather(dataset, shannon, starts_with('P'))

p1 = ggplot(df.shan, aes(BD, shannon)) +
geom_line() +
facet_wrap(~dataset) +
labs(x='Buoyant density', 'Shannon index') +
theme_bw() +
theme(
text = element_text(size=16)
)
p1

In [119]:
%%R -w 500 -h 400
# pairwise correlations for each dataset
df.shan.bin = df.shan %>%
group_by(BD_bin = ntile(BD, 24))

calc.spearman = function(x){
cor(x[,'shannon.x'], x['shannon.y'], method='spearman')[1,1]
}

calc.pearson = function(x){
cor(x[,'shannon.x'], x['shannon.y'], method='pearson')[1,1]
}

df.shan.corr = inner_join(df.shan.bin, df.shan.bin, c('BD_bin' = 'BD_bin')) %>%
group_by(dataset.x, dataset.y) %>%
nest() %>%
mutate(model = purrr::map(data, calc.pearson)) %>%
unnest(pearson = model %>% purrr::map(function(x) x)) %>%
ungroup() %>%
select(-data, -model) %>%
mutate(pearson_txt = round(pearson, 2))

# plotting
ggplot(df.shan.corr, aes(dataset.x, dataset.y, fill=pearson)) +
geom_tile() +
geom_text(aes(label=pearson_txt), color='white') +
labs(title='Shannon index') +
theme(
text = element_text(size=16),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)

## Compositional variance

In [73]:
%%R

total.acomp.var = function(x){
x.acomp = acomp(x)
variation(x.acomp) %>% lower.tri %>% sum
}

lapply(comm.l.rel, total.acomp.var)

\$PL1
[1] 499500

\$PL2
[1] 499500

\$PL3
[1] 499500

\$PW1
[1] 499500

\$PW2
[1] 499500

\$PW3
[1] 499500

\$PH1
[1] 499500

\$PH2
[1] 499500

\$PH3
[1] 499500

## Variance by fraction

In [74]:
%%R -w 800

comm.l.rel.var = lapply(comm.l.rel, function(x) apply(x, 1, var))
df.var = do.call(rbind, comm.l.rel.var) %>% t %>% as.data.frame
df.var\$BD = locs
df.var = df.var %>%
gather(dataset, variance, starts_with('P'))

p1 = ggplot(df.var, aes(BD, variance)) +
geom_line() +
facet_wrap(~dataset) +
theme_bw() +
theme(
text = element_text(size=16)
)
p1

In [75]:
%%R -w 800
p1 + facet_wrap(~ dataset, scale='free_y')

### Pairwise pearson correlations

In [120]:
%%R -w 500 -h 400
# pairwise correlations for each dataset
df.var.bin = df.var %>%
group_by(BD_bin = ntile(BD, 24))

#calc.spearman = function(x){
#    cor(x[,'variance.x'], x['variance.y'], method='spearman')[1,1]
#}
calc.pearson = function(x){
cor(x[,'variance.x'], x['variance.y'], method='pearson')[1,1]
}

df.var.corr = inner_join(df.var.bin, df.var.bin, c('BD_bin' = 'BD_bin')) %>%
group_by(dataset.x, dataset.y) %>%
nest() %>%
mutate(model = purrr::map(data, calc.pearson)) %>%
unnest(pearson = model %>% purrr::map(function(x) x)) %>%
ungroup() %>%
select(-data, -model) %>%
mutate(pearson_txt = round(pearson, 2))

# plotting
ggplot(df.var.corr, aes(dataset.x, dataset.y, fill=pearson)) +
geom_tile() +
geom_text(aes(label=pearson_txt), color='white') +
theme(
text = element_text(size=16)
)

### Comparing Shannon & variance spearman values

In [121]:
%%R
# plotting shannon vs variance spearman correlations

df.corr.j = inner_join(df.shan.corr, df.var.corr, c('dataset.x' = 'dataset.x',
'dataset.y' = 'dataset.y'))

ggplot(df.corr.j, aes(pearson.x, pearson.y)) +
geom_point() +
labs(x='Pearson correlations of\nShannnon index values',
y='Pearson correlations of\nvariance values') +
theme_bw() +
theme(
text = element_text(size=16)
)

#### Notes

• Variance and Shannon index are measuring approx. the same thing (at least for these communities)!

## Correlogram

In [122]:
%%R

BD.diffs = function(BDs){
df.BD = expand.grid(BDs, BDs)
df.BD\$diff = df.BD %>% apply(1, diff) %>% abs %>% as.vector

df.BD = df.BD %>%
rownames(df.BD) = df.BD\$Var2
df.BD\$Var2 = NULL
dist.BD = df.BD %>% as.matrix
dist.BD[upper.tri(dist.BD, diag=TRUE)] = 0
dist.BD %>% as.dist
}

dist.BDs = BD.diffs(locs)
dist.BDs %>% length

[1] 435

### Bray-Curtis

In [123]:
%%R -w 800 -h 650

cal.corr = function(df, BDs, nclass=24, ...){
d = vegdist(df, ...)
d[is.nan(d)] = 0
stopifnot((d %>% length) == (BDs %>% length))

mantel.correlog(d, BDs, n.class=nclass)
}

corr.l = lapply(comm.l.rel, cal.corr, BDs=dist.BDs)

par(mfrow=c(3,3))
for (n in names(corr.l)){
plot(corr.l[[n]])
title(n)
}

#### Notes:

• The shift to negative correlations at an index of ~0.025 is likely due to the difference in community composition between the tails and center.
• The small up-tick at the highest distance classes is due to making the comparison span both tails, but still negative due to including comparisons between tails and center of gradient.
• The analysis seems most sensitive to euclidean peak width
• broader peaks = more autocorrelation (as expected)
• Peak location seems to affect the strength of correlation
• more disperse peaks = higher correlation values
• this is due to more autocorrelation across the entire gradient

### Pairwise linear regressions for each dataset

In [131]:
%%R -w 500 -h 400
# converting to dataframe
tmp.l = list()
for(x in names(corr.l)){
tmp = corr.l[[x]][1][['mantel.res']] %>% as.data.frame
tmp\$data = x
tmp = tmp %>%
filter(!is.na(Mantel.cor)) %>%
group_by(class.index.bin = ntile(class.index, 12))
tmp.l[[x]] = tmp
}
df.corr = do.call(rbind, tmp.l) %>%
as.data.frame

# pairwise linear regressions for each dataset
df.corr = inner_join(df.corr, df.corr, c('class.index.bin' = 'class.index.bin')) %>%
group_by(data.x, data.y) %>%
nest() %>%
mutate(model = purrr::map(data, ~lm(Mantel.cor.y ~ Mantel.cor.x, data=.))) %>%
unnest(model %>% purrr::map(broom::glance)) %>%
ungroup() %>%
select(-data, -model) %>%
mutate(r.squared_txt = round(r.squared, 2))

# plotting
ggplot(df.corr, aes(data.x, data.y, fill=r.squared)) +
geom_tile() +
geom_text(aes(label=r.squared_txt), color='white', size=4) +
labs(title='Beta diversity correlogram') +
theme(
text = element_text(size=16)
)

### Binary

In [83]:
%%R -w 800 -h 650

corr.l = lapply(comm.l.rel, cal.corr, BDs=dist.BDs, binary=TRUE, method='jaccard')

par(mfrow=c(3,3))
for (n in names(corr.l)){
plot(corr.l[[n]])
title(n)
}