In [ ]:
from IPython.display import Image
import pickle

# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))
print "Shapes:"
print train_x.shape, train_y.shape
print valid_x.shape, valid_y.shape
print test_x.shape, test_y.shape

print "--------------"
print "Example input:"
print train_x[0]
print "Example label:"
print train_y[0]

In [ ]:
# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.
from utils import tile_raster_images
from PIL import Image as pil_img

input_images = train_x[:25]
im = pil_img.fromarray(
    tile_raster_images(input_images, 
                       img_shape=(28, 28), 
                       tile_shape=(1, 25),
                       tile_spacing=(1, 1))
)
im.save("some_mnist_numbers.png")
Image(filename="some_mnist_numbers.png")

In [ ]:
# Your basic Theano imports.
import theano
import theano.tensor as T

x = T.matrix('x')

In [ ]:
# Compute the hidden layer from the input
import numpy
import numpy.random as rng

i = numpy.sqrt(6. / (784+500))
# W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)
W_x = numpy.asarray(rng.uniform(low=-i, high=i, size=(28*28, 500)), dtype=theano.config.floatX)
b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)

W_x = theano.shared(W_x, name="W_x")
b_h = theano.shared(b_h, name="b_h")

h = T.tanh(
    T.dot(x, W_x) + b_h
)

In [ ]:
# Compute the output class probabilities from the hidden layer
i = numpy.sqrt(6. / (510))
# W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)
W_h = numpy.asarray(rng.uniform(low=-i, high=i, size=(500, 10)), dtype=theano.config.floatX)
b_y = numpy.zeros(shape=(10,), dtype="float32")

W_h = theano.shared(W_h, name="W_h")
b_y = theano.shared(b_y, name="b_y")

y = T.nnet.softmax(
    T.dot(h, W_h) + b_y
)

# The actual predicted label
y_hat = T.argmax(y, axis=1)

In [ ]:
# Find cost compared to correct labels
correct_labels = T.ivector("labels")

log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]
cost = -T.mean(log_likelihood)

In [ ]:
# Compute gradient updates for the parameters
parameters = [W_x, b_h, W_h, b_y]
gradients = T.grad(cost, parameters)

learning_rate = 0.01
train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]

In [ ]:
# Compile function for training (changes parameters via updates) and testing (no updates)
f_train = theano.function(
    inputs=[x, correct_labels], 
    outputs=cost, 
    updates=train_updates, 
    allow_input_downcast=True
)

f_test = theano.function(
    inputs=[x], 
    outputs=y_hat, 
    allow_input_downcast=True
)

In [ ]:
# Main training loop
batch_size = 100
epochs = 300
check_frequency = 3

train_batches = len(train_x) / batch_size
valid_batches = len(valid_x) / batch_size
test_batches = len(test_x) / batch_size

for epoch in range(epochs):
    print epoch+1, ":",
    
    train_costs = []
    train_accuracy = []
    for i in range(train_batches):
        batch_x = train_x[i*batch_size:(i+1)*batch_size]
        batch_labels = train_y[i*batch_size:(i+1)*batch_size]

        costs = f_train(batch_x, batch_labels)
        preds = f_test(batch_x)
        acc = sum(preds==batch_labels)/float(len(batch_labels))
        
        train_costs.append(costs)
        train_accuracy.append(acc)
    print "cost:", numpy.mean(train_costs), "\ttrain:", str(numpy.mean(train_accuracy)*100)+"%",
    
    valid_accuracy = []
    for i in range(valid_batches):
        batch_x = valid_x[i*batch_size:(i+1)*batch_size]
        batch_labels = valid_y[i*batch_size:(i+1)*batch_size]
        
        preds = f_test(batch_x)
        acc = sum(preds==batch_labels)/float(len(batch_labels))
        
        valid_accuracy.append(acc)
    print "\tvalid:", str(numpy.mean(valid_accuracy)*100)+"%",
    
    test_accuracy = []
    for i in range(test_batches):
        batch_x = test_x[i*batch_size:(i+1)*batch_size]
        batch_labels = test_y[i*batch_size:(i+1)*batch_size]
        
        preds = f_test(batch_x)
        acc = sum(preds==batch_labels)/float(len(batch_labels))
        
        test_accuracy.append(acc)
    print "\ttest:", str(numpy.mean(test_accuracy)*100)+"%"
    
    if (epoch+1) % check_frequency == 0:
        print 'saving filters...'
        weight_filters = pil_img.fromarray(
                tile_raster_images(
                    W_x.get_value(borrow=True).T,
                    img_shape=(28, 28),
                    tile_shape=(20, 25),
                    tile_spacing=(1, 1)
                )
            )
        weight_filters.save("mlp_filters_%d.png"%(epoch+1))

In [ ]: