In [263]:
# dirs
workDir = '/home/nick/notebook/SIPSim/dev/bac_genome3/PCR_sim/'
genomeDir = '/home/nick/notebook/SIPSim/dev/bac_genome3/genomes/'
R_dir = '/home/nick/notebook/SIPSim/lib/R/'
# input files
OTU_table_file = os.path.join(workDir, 'OTU_abs1e9.txt')
# params
nprocs = 24
In [264]:
import numpy as np
import matplotlib.pyplot as plt
In [265]:
%load_ext rpy2.ipython
%matplotlib inline
In [266]:
%%R
library(ggplot2)
library(dplyr)
library(tidyr)
library(gridExtra)
In [320]:
%%R
NegBin = function(mu, alpha=0.5, n=1) {
mu = mu * rgamma(n, shape = alpha, scale=alpha) #rate = 1/alpha)
rpois(n, lambda = mu)
}
In [248]:
%%R
# taxon abundances
abunds = runif(1000, min=1, max=1000)
hist(abunds, 50)
In [249]:
%%R
hist(sapply(abunds, NegBin), 50)
In [250]:
%%R
# abunds with error: repeating count draw many times to increase variance estimation
alpha = 0.1
NegBin.a = function(x){
NegBin(x, alpha=alpha)
}
abund.l = list()
for (i in 1:1000){
abund.l[[i]] = sapply(abunds, NegBin.a)
}
abund.e = do.call(rbind, abund.l) %>%
as.data.frame %>% t
var.e = apply(abund.e, 1, var) %>% as.vector
var.e %>% head
In [251]:
%%R
df = data.frame(abund = abunds, var.e = var.e)
ggplot(df, aes(abund, var.e)) +
geom_point()
In [252]:
%%R
# abunds with error: repeating count draw many times to increase variance estimation
alpha = 0.5
NegBin.a = function(x){
NegBin(x, alpha=alpha)
}
abund.l = list()
for (i in 1:1000){
abund.l[[i]] = sapply(abunds, NegBin.a)
}
abund.e = do.call(rbind, abund.l) %>%
as.data.frame %>% t
var.e = apply(abund.e, 1, var) %>% as.vector
var.e %>% head
In [253]:
%%R
df = data.frame(abund = abunds, var.e = var.e)
ggplot(df, aes(abund, var.e)) +
geom_point()
In [254]:
%%R
# abunds with error: repeating count draw many times to increase variance estimation
alpha = 10
NegBin.a = function(x){
NegBin(x, alpha=alpha)
}
abund.l = list()
for (i in 1:1000){
abund.l[[i]] = sapply(abunds, NegBin.a)
}
abund.e = do.call(rbind, abund.l) %>%
as.data.frame %>% t
var.e = apply(abund.e, 1, var) %>% as.vector
var.e %>% head
In [255]:
%%R
df = data.frame(abund = abunds, var.e = var.e)
ggplot(df, aes(abund, var.e)) +
geom_point()
In [310]:
def neg_binom(mu, alpha=0.5, size=None):
mu = mu * np.random.gamma(shape=alpha, scale=alpha, size=size)
f = lambda x: np.random.poisson(size=1, lam=x)[0]
return [f(x) for x in mu]
In [224]:
counts = np.random.uniform(1000, 1, 1000)
plt.hist(counts, 50)
Out[224]:
In [260]:
n_counts = len(counts)
n_rep = 1000
alpha = 0.1
ar = np.zeros((n_rep, n_counts))
neg_binom_a = lambda x : neg_binom(x, alpha=alpha)
for i in range(n_rep):
ar[i,] = np.apply_along_axis(neg_binom_a, 0, counts)
counts_var = np.var(ar, 0)
In [261]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(counts, counts_var, s=1)
xl = ax.set_xlabel("Counts")
yl = ax.set_ylabel("Variance")
ax.set_xlim([0,np.max(counts)])
ax.set_ylim([0,np.max(counts_var)])
Out[261]:
In [256]:
n_counts = len(counts)
n_rep = 1000
alpha = 0.5
ar = np.zeros((n_rep, n_counts))
neg_binom_a = lambda x : neg_binom(x, alpha=alpha)
for i in range(n_rep):
ar[i,] = np.apply_along_axis(neg_binom_a, 0, counts)
counts_var = np.var(ar, 0)
In [257]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(counts, counts_var, s=1)
xl = ax.set_xlabel("Counts")
yl = ax.set_ylabel("Variance")
ax.set_xlim([0,np.max(counts)])
ax.set_ylim([0,np.max(counts_var)])
Out[257]:
In [258]:
n_counts = len(counts)
n_rep = 1000
alpha = 10
ar = np.zeros((n_rep, n_counts))
neg_binom_a = lambda x : neg_binom(x, alpha=alpha)
for i in range(n_rep):
ar[i,] = np.apply_along_axis(neg_binom_a, 0, counts)
counts_var = np.var(ar, 0)
In [259]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(counts, counts_var, s=1)
xl = ax.set_xlabel("Counts")
yl = ax.set_ylabel("Variance")
ax.set_xlim([0,np.max(counts)])
ax.set_ylim([0,np.max(counts_var)])
Out[259]:
In [304]:
!cd $workDir; \
SIPSim OTU_add_error \
OTU_abs1e9.txt \
> OTU_abs1e9_err.txt
!cd $workDir; \
head -n 4 OTU_abs1e9_err.txt
In [298]:
%%R -i workDir
setwd(workDir)
tbl.otu = read.delim('OTU_abs1e9_err.txt', sep='\t')
tbl.otu %>% head(n=3)
In [299]:
%%R
tbl.otu.g = tbl.otu %>%
gather(abund_type, value, count, rel_abund)
tbl.otu.g %>% head(n=3)
In [300]:
%%R -w 700
ggplot(tbl.otu.g, aes(BD_mid, value, fill=taxon, color=taxon)) +
geom_point() +
geom_line() +
#geom_area(position='fill', alpha=0.5) +
facet_grid(abund_type ~ ., scales='free_y')
In [ ]:
In [ ]:
In [ ]: