Goal

  • Assessing the error in taxon abundances when using qPCR data + 16S sequence relative abundances to determine taxon proportional absolute abundances

Init


In [2]:
%load_ext rpy2.ipython

In [4]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)


/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: 
Attaching package: ‘dplyr’


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:stats’:

    filter, lag


  res = super(Function, self).__call__(*new_args, **new_kwargs)
/opt/anaconda/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


  res = super(Function, self).__call__(*new_args, **new_kwargs)

In [3]:
def neg_binom_err(m, r, negs=False):
    """Adding negative binomial distribuiton error, where variance
    scales more with the mean than a poisson distribution if (r < inf).

    Parameters
    ----------
    m : float
        Mean value
    r : float
        Negative binomial dispersion parameter
    negs : bool
        Negative values allowed? (otherwise 0)
    """
    sigma = np.sqrt(m + m**2 / r)
    x =  np.random.normal(m, sigma)
    if negs==False and x < 0:
        x = 0
    return x

Making dataset


In [23]:
%%R -w 900 -h 300

n = 500
meanlog = 0.5
sdlog = 1

comm = rlnorm(n, meanlog, sdlog)
comm = data.frame(1:length(comm), comm)
colnames(comm) = c('taxon', 'count')

comm = comm %>%
    mutate(taxon = as.character(taxon)) %>%
    group_by() %>%
    mutate(rel_abund = count / sum(count)) %>%
    ungroup()

comm$taxon = reorder(comm$taxon, -comm$rel_abund)

ggplot(comm, aes(taxon, rel_abund)) +
    geom_point() +
    theme_bw() +
    theme(
        axis.text.x = element_blank()
    )


Simulating qPCR data


In [ ]:
%%R

neg_binom_err = function(m, r, negs=FALSE){
    sigma = sqrt(m + m**2 / r)
    x = rnorm(1, m, sigma)
    if (negs==FALSE & x < 0){
        x = 0
    }
    return(x)
}

In [ ]:
%%R

total_copies = 1e9
n_reps = 3
r = 0.5

neg_binom_err(total_copies, r)

#sapply(1:n_reps, function(x) neg_binom_err(total_copies, r))