In [1]:
%pylab inline
%autoreload 2
import models as m
import visualization as v
import layers as l
import trainers as t
import utils
import time
import numpy as np
from dataset import load_dataset, mnist
In [2]:
# Loads mnist images (28x28 pixels, so vectors of length 784). Array shape is [# examples, 784]
mnist_data = mnist()
print mnist_data.shape
Only 36 hidden units, so samples are not nearly as clear as more complex models (see PCD15 below)
In [67]:
cd1 = m.BinaryRBM(784, 36, t.CD_model(1), ordered_trainers=[t.WeightDecay(0.1)])
cd1.train(lr=0.1, epoch=15, batch_size=20, data=mnist_data, lr_schedule=t.lr_constant, checkpoint=1)
In [68]:
# Plot bias and weight matrices
v.plot_images(cd1.connections[0].W, (28, 28), (6, 6), space=1, size=(5, 5), )
v.show_image(cd1.layers[0].bias, dim=(28,28))
Sampling from a trained model consists of creating a sampler, say G, by calling "model.dream" with starting data and #steps between samples. Calling G.next() then returns the next sample.
In this example, samples are drawn from the just trained RBM "cd1".
CD1 training does not create a very good sample in this case.
In [60]:
# Generate 10 samples for each of 5 different starting examples
for start in mnist_data[np.random.randint(0, len(mnist_data), 5)]:
# Initialize generator, 1000 gibbs steps between samples
G = cd1.dream(start, steps=1000)
im = [G.next() for i in range(10)] # Generate 10 sample
v.plot_images([start] + im, (28, 28), (1, 11), space=1, size=(10, 10)) #
Models have a few basic parts: layers, connections, statistics, and trainers
Both of these calculate the gradient given the data and model dependent statistics.
This model is much higher quality than the small CD1 RBM (more hidden units, better model estimation). It also takes significantly longer to train since 15 gibbs steps must be taken for every weight update.
In [3]:
pcd1 = m.BinaryRBM(784, 500, t.PCD_model(1))
pcd1.train(lr=0.1, epoch=15, batch_size=20, data=mnist_data, checkpoint=1)
In [8]:
# Plot bias and weight matrices
v.plot_images(pcd1.connections[0].W, (28, 28), (10, 10), space=1, size=(10, 10))
v.show_image(pcd1.layers[0].bias, dim=(28,28))
In [50]:
for start in mnist_data[np.random.randint(0, len(mnist_data), 10)]:
# 1000 gibbs steps between samples
g = pcd1.dream(start, steps=1000)
im = [g.next() for i in range(10)]
v.plot_images([start] + im, (28, 28), (1, 11), space=1, size=(7, 7))
When 5 steps are taken between visualization, we can more easily see how the samples slowly change shape.
In [10]:
for start in mnist_data[np.random.randint(0, len(mnist_data), 3)]:
# 5 gibbs steps between samples
g = pcd1.dream(start, steps=5)
im = [g.next() for i in range(99)]
v.plot_images([start] + im, (28, 28), (5, 20), space=1, size=(14, 14))