In [1]:
using BNP
In [2]:
# define size of bar image
img_size = 5
# define amount of noise
noise_level = 0.01
# define probabilities of generating a particular number of bars
num_per_mixture = [0 ones(1,3)] ./ 3
# define number of groups (J)
num_group = 40
# define number of data items drawn from each group
num_data = 50;
In [3]:
# generate data
(samples, bars) = generateBarsDataset(img_size, noise_level, num_per_mixture, num_group, num_data);
In [5]:
using Images
using Interact
# create array of sample images used for training
I = Array{Array}(num_group)
# loop over groups / documents
for i in 1:num_group
II = zeros(img_size, img_size)
for j in samples[i]
xi, yi = ind2sub(size(II), j)
II[xi, yi] += 1
end
# assign document
I[i] = II ./ maximum(II)
end
# Interactivelly visualize data
@manipulate for group in 1:num_group
grayim(I[group])
end
Out[5]:
In [6]:
# create array of shared distributions (topics) underlying the data
I = Array{Array}(num_group)
# loop over all shared distributions
for i in 1:size(bars, 2)
II = reshape(bars[:,i], img_size, img_size)
# assign shared distributions image
I[i] = II ./ maximum(II)
end
# Interactivelly visualize shared distributions
@manipulate for topic in 1:size(bars, 2)
grayim(I[topic])
end
Out[6]:
In [8]:
# Dimensionality of the data
D = img_size * img_size
# We assume a Multinomial Distribution with a Dirichlet Prior as base distribution
H = MultinomialDirichlet(D, 1.0)
# Train a Hierarical Dirichlet Process Mixture Model guessing 10 shared Distributions
models = train(BNP.HDP(H, α = 1.0), Gibbs(), RandomInitialisation(k = 10), samples);
In [41]:
# create an array to store the resulting topics for each iteration
R = Array{Array}(length(models))
# loop over all iterations
for iter in 1:length(models)
# filter out empty distributions (topics)
G = filter(x -> x.n > 0, models[iter].G)
# create array of resulting topics for this iteration
I = Array{Array}(length(G))
# loop over all topics
for i in 1:length(G)
# reshape distribution to 2d image dimensions
II = reshape(full(G[i].counts), img_size, img_size)
# assign and normalize (necessary for visualisation)
I[i] = II ./ maximum(II)
end
# assign to results array
R[iter] = I
end
@manipulate for iteration in 1:length(models), topic in 1:size(bars, 2)
grayim(R[iteration][topic])
end
Out[41]: