In [1]:
# define test data as in official code by Palla et al.
D = 40
K = 2
N = 100
C = zeros(D, K)
C[1:20, 1] = 1
C[21:40, 2] = 1
cc = zeros(1,D)
cc[1:20] = 1
cc[21:40] = 2
# set random seed to be comparable
srand(12345)
In [3]:
# set G and X (factor loadings, and latent factors) of test data
G = mu_g + sigma_g * randn(D, K)
X = mu_x + sigma_x * randn(K, N)
# set noise
ϵ = mu_noise + sigma_noise * randn(D,N);
In [4]:
# test data
Y = (G .* C) * X + ϵ;
(D, N) = size(Y)
Out[4]:
In [132]:
include("../src/BNP.jl")
In [141]:
BNP.train(BNP.VCM(), BNP.Gibbs(), BNP.IncrementalInitialisation(), Y)
In [127]:
spones(1)
In [37]:
collect(3)
Out[37]:
In [19]:
(D, N) = size(Y)
# lets "forget" true value of X, G, C and cc
X = randn(1,N)
G = randn(1,1)
C = ones(1,1)
cc = [1]
Ns = [1]
Yd = Y[1,:]
maskd = m_unobserved[1,:]
Yd[maskd] = 0
tic();
print("evaluate dimension: 1")
# run one iteration using Yd only
X = sample_latent_factors(Yd, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Yd, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α )
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Yd, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Yd, G, C, X, sigma_hyper_a, sigma_hyper_b)
# loop over D-1 dimensions
for d = 2:D
YY = Y[d,:]
YY[m_unobserved[d,:]] = 0.0
Yd = cat(1, Yd, YY)
print(" ", d)
# sample unobserved data from model
(X, cc, C, G) = gibbs_aux_assignment(Yd, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α, out_dim = d)
# not sure why this is required...
# run one iteration:
X = sample_latent_factors(Yd, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Yd, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α)
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Yd, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Yd, G, C, X, sigma_hyper_a, sigma_hyper_b)
# actually sample unobserved data
mask = m_unobserved[1:d,:]
(dd, nn) = size(Yd)
Yd[mask] = sample_vcm_data(nn, dd, G, X, C, mu_noise, sigma_noise)[mask]
end
t = toq();
println("")
println("finished [", t," sec].")
In [39]:
G
Out[39]:
In [47]:
function univarnormal_logpdf(X::Array{Float64}, Mu::Array{Float64}; σ=1.0, n=1)
E = X - Mu
return n*(log(σ) + 0.5*log(2*pi)) - 0.5*sum(E.^2) / (σ^2)
end
Out[47]:
In [149]:
# compute energy
(D, N) = size(Y)
llh = univarnormal_logpdf(Y, (G.*C) * X, σ=sigma_noise, n = -D*N)
# simplified normal pdf
prior_g = -0.5 * (1/sigma_g.^2) * sum( (G-repmat((mu_g * ones(D, 1)), 1, K)).^2 )
#univarnormal_logpdf(G, repmat((mu_g * ones(D, 1)), 1, K), σ=sigma_g)
prior_x = -0.5 * (1/sigma_x.^2) * sum( (X-repmat((mu_x * ones(K, 1)), 1, N)).^2 )
#univarnormal_logpdf(X, repmat((mu_x * ones(K, 1)), 1, N), σ=sigma_x)
# dirichlet prior?
K = size(C, 2)
Ns = map(c -> sum(cc .== c), unique(cc))
prior_c = lgamma(α)+K*log(α) +sum(lgamma(Ns)) - lgamma(α+D)
println("llh: ", llh)
println("prior g: ", prior_g)
println("prior x: ", prior_x)
println("prior c: ", prior_c)
# energy
e = llh + prior_g + prior_x + prior_c
Out[149]:
In [55]:
# define MCMC result object
type VCMS
# energy
energy::Float64
# number of clusters
K::Int
# concentration parameter (Dirichlet)
α::Float64
end
In [151]:
burnin = 0
(D, N) = size(Y)
for i in 1:burnin
Y[m_unobserved] = sample_vcm_data(N, D, G, X, C, mu_noise, sigma_noise)[m_unobserved]
# run one iteration
X = sample_latent_factors(Y, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Y, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α)
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Y, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Y, G, C, X, sigma_hyper_a, sigma_hyper_b)
end
In [170]:
results = VariableClusteringSetting[]
thinout = 1
iterations = 1000
Px = zeros(iterations)
Pg = zeros(iterations)
Pc = zeros(iterations)
Pl = zeros(iterations)
for i in 1:iterations
# run thinning (ignore some Gibbs sweeps if thinout > 1)
for j in 1:thinout
Y[m_unobserved] = sample_vcm_data(N, D, G, X, C, mu_noise, sigma_noise)[m_unobserved]
# run one iteration
X = sample_latent_factors(Y, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Y, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α)
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Y, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Y, G, C, X, sigma_hyper_a, sigma_hyper_b)
end
# compute energy
llh = univarnormal_logpdf(Y, (G.*C) * X, σ=sigma_noise, n = -D*N)
# simplified normal pdf
prior_g = -0.5 * (1/sigma_g.^2) * sum( (G-repmat((mu_g * ones(D, 1)), 1, K)).^2 )
#univarnormal_logpdf(G, repmat((mu_g * ones(D, 1)), 1, K), σ=sigma_g)
prior_x = -0.5 * (1/sigma_x.^2) * sum( (X-repmat((mu_x * ones(K, 1)), 1, N)).^2 )
#univarnormal_logpdf(X, repmat((mu_x * ones(K, 1)), 1, N), σ=sigma_x)
# dirichlet prior?
K = size(C, 2)
Ns = map(c -> sum(cc .== c), unique(cc))
prior_c = lgamma(α)+K*log(α) +sum(lgamma(Ns)) - lgamma(α+D)
# energy
e = llh + prior_g + prior_x + prior_c
Px[i] = prior_x
Pc[i] = prior_c
Pg[i] = prior_g
Pl[i] = llh
# store result of current Gibbs sweep
push!(results, VariableClusteringSetting(e, X, G, C, α, sigma_noise, sigma_g))
# println("iteration: ", i, " with energy: ", e, " K: ", K)
end
In [177]:
# do some plots
using Gadfly
E = map(vcm -> vcm.energy, results);
plot(x = [1:iterations], y = E, Geom.line)
Out[177]:
In [94]:
C
Cy = zeros(D, K)
Cy[1:20, 1] = 1
Cy[21:40, 2] = 1
sum(abs(C - Cy))
Out[94]:
In [88]:
using UCIMLRepo
using DataFrames
ucirepoinfo("ionosphere")
In [107]:
url = string("http://archive.ics.uci.edu/ml/machine-learning-databases/yeast/yeast.data")
dat = UCIMLRepo.download_http(url)
rows = ASCIIString[]
rows = split(dat,'\n',false)
df = DataFrame()
for i = 1:size(rows)[1]
df[i] = filter(x -> length(x) > 0, split(rows[1],' '))
end
data = convert(Array, df)
classes = data[end,:]
Y = float(data[2:end-1,:])
Out[107]:
In [115]:
data = convert(Array, ucirepodata("ionosphere"))
classes = data[end,:]
Y = float(data[1:end-1,:])
(D, N) = size(Y)
using StatsBase
Y = zscore(Y, 1)
Out[115]:
In [116]:
(D, N) = size(Y)
hold_out_perc = 0.1
hold_out = ceil(hold_out_perc * D * N)
per = randperm(D*N)
m_unobserved = bool( sparse( zeros(size(Y)) ) )
m_unobserved[per[1:hold_out]] = true;
# lets "forget" true value of X, G, C and cc
X = randn(1,N)
G = randn(1,1)
C = ones(1,1)
cc = [1]
Ns = [1]
Yd = Y[1,:]
print("evaluate dimension: 1")
# run one iteration using Yd only
X = sample_latent_factors(Yd, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Yd, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α )
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Yd, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Yd, G, C, X, sigma_hyper_a, sigma_hyper_b)
# loop over D-1 dimensions
@time for d = 2:D
YY = Y[d,:]
Yd = cat(1, Yd, YY)
print(" ", d)
# sample unobserved data from model
(X, cc, C, G) = gibbs_aux_assignment(Yd, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α, out_dim = d)
# not sure why this is required...
# run one iteration:
X = sample_latent_factors(Yd, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Yd, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α)
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Yd, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Yd, G, C, X, sigma_hyper_a, sigma_hyper_b)
# actually sample unobserved data
#Yd[mask] = sample_vcm_data(nn, dd, G, X, C, mu_noise, sigma_noise)[mask]
#Yd = sample_vcm_data(nn, dd, G, X, C, mu_noise, sigma_noise)
if d == D
println("")
end
end
In [106]:
typeof(m_unobserved)
Out[106]:
In [44]:
burnin = 100
(D, N) = size(Y)
@time for i in 1:burnin
Y = sample_vcm_data(N, D, G, X, C, mu_noise, sigma_noise)
# run one iteration
X = sample_latent_factors(Y, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Y, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α)
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Y, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Y, G, C, X, sigma_hyper_a, sigma_hyper_b)
end
In [117]:
results = VCMS[]
thinout = 1
iterations = 100
@time for i in 1:iterations
# run thinning (ignore some Gibbs sweeps if thinout > 1)
for j in 1:thinout
#Y[m_unobserved] = sample_vcm_data(N, D, G, X, C, mu_noise, sigma_noise)[m_unobserved]
# run one iteration
X = sample_latent_factors(Y, C, sigma_noise, sigma_x, X, G)
(X, cc, C, G) = gibbs_aux_assignment(Y, X, cc, C, G, m_aux, sigma_noise, sigma_g, mu_g, α)
K = size(C, 2)
α = random_concentration_parameter(α, 1.0, 1.0, N, K)
sigma_noise = random_sigma_noise(Y, G, C, X, noise_hyper_a, noise_hyper_b)
sigma_g = random_sigma_g(Y, G, C, X, sigma_hyper_a, sigma_hyper_b)
end
# compute energy
llh = univarnormal_logpdf(Y, (G.*C) * X, σ=sigma_noise, n = -D*N)
# simplified normal pdf
prior_g = -0.5 * (1/sigma_g.^2) * sum( (G-repmat((mu_g * ones(D, 1)), 1, K)).^2 )
prior_x = -0.5 * (1/sigma_x.^2) * sum( (X-repmat((mu_x * ones(K, 1)), 1, N)).^2 )
# dirichlet prior?
K = size(C, 2)
Ns = map(c -> sum(cc .== c), unique(cc))
prior_c = lgamma(α)+K*log(α) +sum(lgamma(Ns)) - lgamma(α+D)
# energy
e = llh + prior_g + prior_x + prior_c
#e = prior_x
# store result of current Gibbs sweep
push!(results, VCMS(e, K, α))
#println("iteration: ", i, " with energy: ", e, " K: ", K)
end
In [118]:
# do some plots
using Gadfly
E = map(vcm -> vcm.energy, results);
K = map(vcm -> vcm.K, results);
A = map(vcm -> vcm.α, results);
In [120]:
p1 = plot(x = [1:iterations], y = E, Geom.line)
p2 = plot(x = [1:iterations], y = K, Geom.line)
#p3 = plot(x = [1:iterations], y = A, Geom.line)
#vstack(p1, p2, p3)
Out[120]: