In [1]:
%load_ext rpy2.ipython

In [173]:
%%R
library(coenocliner)
library(compositions)
library(dplyr)
library(tidyr)
library(ggplot2)


/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘tidyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:coenocliner’:

    expand


  res = super(Function, self).__call__(*new_args, **new_kwargs)

Simulate comunities


In [3]:
%%R 
set.seed(2)
M <- 3                                    # number of species
ming <- 3.5                                # gradient minimum...
maxg <- 7                                  # ...and maximum
meang = mean(c(ming, maxg))
locs <- seq(ming, maxg, length = 100)      # gradient locations
opt  <- rnorm(M, mean=meang, sd=0.5)      #runif(M, min = ming, max = maxg)   # species optima
tol  <- rep(0.25, M)                       # species tolerances
h    <- ceiling(rlnorm(M, meanlog = 10))    # max abundances
pars <- cbind(opt = opt, tol = tol, h = h) # put in a matrix

In [4]:
%%R
mu1 <- coenocline(locs, responseModel = "gaussian", params = pars,
                    countModel = "poisson") #, countParams = list(alpha = 0.5))
head(mu1)


     [,1] [,2] [,3]
[1,]    0    0    0
[2,]    0    0    0
[3,]    0    0    0
[4,]    0    0    0
[5,]    1    0    0
[6,]    0    0    0

In [5]:
%%R -w 600 -h 350
matplot(locs, mu1, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [6]:
%%R 
set.seed(2)
M <- 2                                    # number of species
ming <- 3.5                                # gradient minimum...
maxg <- 7                                  # ...and maximum
meang = mean(c(ming, maxg)) + 0.3
locs <- seq(ming, maxg, length = 100)      # gradient locations
opt  <- rnorm(M, mean=meang, sd=0.5)   #runif(M, min = ming, max = maxg)   # species optima
tol  <- rep(0.25, M)                       # species tolerances
h    <- ceiling(rlnorm(M, meanlog = 9.5))    # max abundances
pars <- cbind(opt = opt, tol = tol, h = h) # put in a matrix

In [7]:
%%R

mu2 <- coenocline(locs, responseModel = "gaussian", params = pars,
                    countModel = "poisson") #, countParams = list(alpha = 0.5))
mu2 = cbind(mu1, mu2)
head(mu2)


     [,1] [,2] [,3] [,4] [,5]
[1,]    0    0    0    0    0
[2,]    0    0    0    0    0
[3,]    0    0    0    0    0
[4,]    0    0    0    0    0
[5,]    1    0    0    0    0
[6,]    0    0    0    0    0

In [8]:
%%R -w 600 -h 350

matplot(locs, mu2, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")


Relative abundances


In [9]:
%%R

to.rel = function(x){
    y = sum(x)
    if(y == 0){
        return(x)
    } else {
        return(x / y)
    }
}

mu1_rel = t(apply(mu1, 1, to.rel))
mu2_rel = t(apply(mu2, 1, to.rel))

# check
apply(mu1_rel, 1, sum)


  [1] 0 0 0 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

In [10]:
%%R -w 600 -h 350

matplot(locs, mu1_rel, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [11]:
%%R -w 600 -h 350

matplot(locs, mu2_rel, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")


CLR transformation

With raw compositional data


In [12]:
%%R -w 600 -h 350

mu1_clr = clr(mu1_rel)
matplot(locs, mu1_clr, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [13]:
%%R -w 600 -h 350

mu2_clr = clr(mu2_rel)
matplot(locs, mu2_clr, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [14]:
%%R -w 600 -h 350

matplot(locs, mu2_clr[,c(3,5)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")


ALR transformation (from most abundant taxon)


In [15]:
%%R
apply(mu2, 2, sum)


[1]  126235  360437  445578 1158892   76601

In [16]:
%%R -w 600 -h 350

mu2_alr = alr(mu2_rel, ivar=5)
matplot(locs, mu2_alr, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")


extrapolating beyond veil-line abundances


In [17]:
%%R -w 600 -h 350

mu1_rel_nZ = mu1_rel
mu1_rel_nZ[mu1_rel_nZ == 0] = min(mu1_rel[mu1_rel > 0]) / 2

mu1_clr = clr(mu1_rel_nZ)
matplot(locs, mu1_clr, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [18]:
%%R -w 600 -h 350

mu2_rel_nZ = mu2_rel
mu2_rel_nZ[mu2_rel_nZ == 0] = min(mu2_rel[mu2_rel > 0]) / 2

mu2_clr = clr(mu2_rel_nZ)
matplot(locs, mu2_clr, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [19]:
%%R -w 600 -h 350

mu2_alr = alr(mu2_rel_nZ, ivar=5)
matplot(locs, mu2_alr, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")


spline-fitting sandbox


In [20]:
%%R -w 600 -h 350

ss_fit = function(y, x){
    #ss = smooth.spline(x,y)
    #return(ss$y)
    ss = pspline::smooth.Pspline(x,y,method=3)
    #plot(ss)
    return(ss$ysmth)
}

mu2_clrSsInv = clrInv(apply(mu2_clr, 2, ss_fit, x=locs))

matplot(locs, mu2_clrSsInv, lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [21]:
%%R
center.mass = function(y,x){
    wm = weighted.mean(x, y)
    return(wm)    
}

apply(mu2, 2, center.mass, x=locs) %>% print
apply(mu2_rel, 2, center.mass, x=locs) %>% print 
apply(mu2_clr, 2, center.mass, x=locs) %>% print 
apply(mu2_clrSsInv, 2, center.mass, x=locs) %>% print


[1] 4.800725 5.342898 6.043901 5.101303 5.642460
[1] 4.186292 5.417059 6.373717 4.925863 5.720374
[1] 158.675512   5.761936  23.205569   3.359175   4.051528
[1] 4.103747 5.232884 6.308480 4.898518 5.185135

In [ ]:


In [22]:
%%R -w 600 -h 350

ss3 = smooth.spline(locs, mu2_clr[,3])
ss5 = smooth.spline(locs, mu2_clr[,5])

matplot(locs, mu2_clr[,c(3,5)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")
lines(ss3, lty=2, col='black')
lines(ss5, lty=2, col='red')



In [23]:
%%R -w 600 -h 350

plot_clr_spline = function(col, mu, mu_rel, mu_clr, locs){
    ss = smooth.spline(locs, mu_clr[,col])
    plot(locs, mu[,col])
    plot(locs, mu_rel[,col])
    plot(locs, mu_clr[,col])
    plot(locs, ss$y)
    plot(locs, clrInv(ss$y))
    }

plot_clr_spline(1, mu2, mu2_rel, mu2_clr, locs)



In [24]:
%%R -w 600 -h 350

plot_clr_spline(2, mu2, mu2_rel, mu2_clr, locs)



In [25]:
%%R -w 600 -h 350

plot_clr_spline(3, mu2, mu2_rel, mu2_clr, locs)



In [26]:
%%R -w 600 -h 350

plot_clr_spline(4, mu2, mu2_rel, mu2_clr, locs)



In [27]:
%%R -w 600 -h 350

plot_clr_spline(5, mu2, mu2_rel, mu2_clr, locs)


Calculating center of mass


In [29]:
%%R

center.mass = function(locs, mu, col, ss=FALSE){
    if(ss==TRUE){
        y = smooth.spline(locs, mu[,col])
        y = clrInv(y$y)
    } else {
        y = mu[,col]
    }
    wm = weighted.mean(locs, y)
    print(wm)
    }

col = 1
center.mass(locs, mu2, col) 
center.mass(locs, mu2_rel, col)
center.mass(locs, mu2_clr, col, ss=TRUE)


[1] 4.800725
[1] 4.186292
[1] 3.93535

In [30]:
%%R

col = 2
center.mass(locs, mu2, col) 
center.mass(locs, mu2_rel, col)
center.mass(locs, mu2_clr, col, ss=TRUE)


[1] 5.342898
[1] 5.417059
[1] 5.203474

In [31]:
%%R

col = 3
center.mass(locs, mu2, col) 
center.mass(locs, mu2_rel, col)
center.mass(locs, mu2_clr, col, ss=TRUE)


[1] 6.043901
[1] 6.373717
[1] 6.778234

In [32]:
%%R

col = 4
center.mass(locs, mu2, col) 
center.mass(locs, mu2_rel, col)
center.mass(locs, mu2_clr, col, ss=TRUE)


[1] 5.101303
[1] 4.925863
[1] 4.45979

In [33]:
%%R

col = 5
center.mass(locs, mu2, col) 
center.mass(locs, mu2_rel, col)
center.mass(locs, mu2_clr, col, ss=TRUE)


[1] 5.64246
[1] 5.720374
[1] 6.072672

In [34]:
%%R

col = 6
center.mass(locs, mu2, col) 
center.mass(locs, mu2_rel, col)
center.mass(locs, mu2_clr, col, ss=TRUE)


Error in mu[, col] : subscript out of bounds
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Error in mu[, col] : subscript out of bounds

  res = super(Function, self).__call__(*new_args, **new_kwargs)

Calculating compositional stats


In [35]:
%%R
# mean
x = acomp(mu2_rel)
mean(x)


[1] 0.16276251 0.16075382 0.25461552 0.33122418 0.09064397
attr(,"class")
[1] acomp

In [36]:
%%R
mvar(x) %>% print
msd(x) %>% print


[1] 22.89199
[1] 2.29597

In [37]:
%%R
variation(x)


          [,1]      [,2]      [,3]      [,4]      [,5]
[1,]  0.000000  7.924724 37.709954  2.519249 18.803430
[2,]  7.924724  0.000000 11.364752  1.534914  2.329903
[3,] 37.709954 11.364752  0.000000 21.038942  3.592937
[4,]  2.519249  1.534914 21.038942  0.000000  7.641137
[5,] 18.803430  2.329903  3.592937  7.641137  0.000000

In [38]:
%%R -h 300
matplot(locs, mu2[,c(1,4)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")
matplot(locs, mu2_rel[,c(1,4)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")
matplot(locs, mu2_clr[,c(1,4)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")



In [39]:
%%R -h 300
matplot(locs, mu2[,c(1,3)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")
matplot(locs, mu2_rel[,c(1,3)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")
matplot(locs, mu2_clr[,c(1,3)], lty = "solid", type = "p", pch = 1:10, cex = 0.8,
        xlab = "pH", ylab = "Abundance")