Let's start with a simple model:
$$ \begin{align} n &\sim \mathrm{Normal}(\mu, \sigma^2) \\ \mu &\sim \mathrm{Gamma}(2, 1) \\ \sigma &\sim \mathrm{Gamma}(1, 2) \end{align} $$
In [1]:
using Distributions
using ForwardDiff
using VinDsl
srand(12587);
In [2]:
N = 5000
a, b, c, d = 2, 1, 1, 2
λ = Gamma(a, b)
ϕ = Gamma(d, c)
μ = rand(λ)
σ = rand(ϕ)
n = rand(Normal(μ, σ), N);
In [3]:
using PyPlot
plt[:hist](n);
In [4]:
constrain_positive(x) = exp(x)
unconstrain_positive(x) = log(x)
post_pars(d::Distribution) = (p = length(d); Int(p * (p + 3)/2))
Out[4]:
In [5]:
vars = [λ, ϕ]
nvars = length(vars)
npars = sum(map(post_pars, vars))
Out[5]:
In [6]:
x = rand(npars)
Out[6]:
In [7]:
function ELBO(x)
# make posterior distributions
d_λ = Normal(x[1], exp(x[2]))
d_ϕ = Normal(x[3], exp(x[4]))
# scaled unconstrained parameters
ζ_λ = rand(d_λ)
ζ_ϕ = rand(d_ϕ)
# constrained parameters
λ = constrain_positive(ζ_λ)
ϕ = constrain_positive(ζ_ϕ)
# jacobian
jac = ForwardDiff.jacobian(constrain_positive)
# make ELBO
L = sum(map(H, [d_λ, d_ϕ]))
L += logpdf(Gamma(a, b), λ)
L += logpdf(Gamma(d, c), ϕ)
L += sum(logpdf(Normal(λ, ϕ), n))
L += logdet(jac([ζ_λ]))
L += logdet(jac([ζ_ϕ]))
end
Out[7]:
In [8]:
ELBO(x)
Out[8]:
In [9]:
x0 = randn(npars)
Out[9]:
In [10]:
∇L = ForwardDiff.gradient(ELBO)
Out[10]:
In [11]:
ELBO(x)
Out[11]:
In [12]:
eps = 0.1
decay = 0.9
x = x0
avg_sq_grad = nothing
elbo = Float64[]
Out[12]:
In [13]:
for idx in 1:200
gg = ∇L(x)
if avg_sq_grad != nothing
avg_sq_grad = avg_sq_grad * decay + gg.^2 * (1 - decay)
else
avg_sq_grad = gg.^2
end
x += eps * gg ./ (sqrt(avg_sq_grad) + 1e-8)
L = ELBO(x)
display(L)
push!(elbo, L)
end
In [14]:
plot(elbo)
plt[:ylim](-2e4, -9000)
Out[14]:
In [15]:
∇L(x)
Out[15]:
In [16]:
x
Out[16]:
In [17]:
exp(x[1]), exp(x[3])
Out[17]:
In [18]:
μ, σ
Out[18]:
In [ ]: