In [53]:
%matplotlib inline
import numpy as np
import numpy.random as rng
import pylab as pl
In [54]:
N_categories = 20
In [55]:
def make_sample(Cat_SRC, Cat_BGD, N_SRC, N_counts):
N_BGD = N_counts - N_SRC
counts_SRC = rng.multinomial(1, Categorical_SRC, N_SRC).sum(0)
counts_BGD = rng.multinomial(1, Categorical_BGD, N_BGD).sum(0)
return counts_SRC, counts_BGD
In [56]:
def get_logL_patch(counts, alphas):
"""
Takes a vector of counts (across bins), and vector of alpha hyperparameters (ditto).
Returns the log likelihood of those counts under the Dirichlet-multinomial distribution with
those hyperparameters.
"""
# initialize internal (static) variables
try:
get_logL_patch.Cmax
except:
get_logL_patch.Cmax = 1
get_logL_patch.gammaln = np.array([np.Inf])
N, A = np.sum(counts), np.sum(alphas)
# calculate more lookup table if necessary
if N + A > get_logL_patch.Cmax:
print ("get_logL_patch(): Calculating expensive lookup shit")
get_logL_patch.Cmax = 8 * (N + A)
get_logL_patch.gammaln = np.append(np.array([np.Inf, 0.]), np.cumsum(np.log(np.arange(get_logL_patch.Cmax - 2) + 1)))
# now the actual LF
logL = get_logL_patch.gammaln[A] - get_logL_patch.gammaln[N + A] + np.sum(get_logL_patch.gammaln[counts + alphas]) - np.sum(get_logL_patch.gammaln[alphas])
logL = logL + get_logL_patch.gammaln[N] - np.sum(get_logL_patch.gammaln[counts])
return logL
In [80]:
C=100
true_alphas = C*np.ones(N_categories, dtype=int)
Categorical = rng.dirichlet(true_alphas)
#print(Categorical)
counts = rng.multinomial(1, Categorical, 20000).sum(0)
print(counts)
Cs = np.arange(1, 400)
scores = []
for C in Cs:
test_alphas = C*np.ones(N_categories, dtype=int)
logL = get_logL_patch(counts, test_alphas)
scores.append(logL)
pl.plot(Cs, scores)
pl.draw()
In [70]:
In [ ]: