In [ ]:
from IPython.display import Image
import pickle
# Download and unzip pickled version from here: http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(open('data/mnist.pkl', 'r'))
print "Shapes:"
print train_x.shape, train_y.shape
print valid_x.shape, valid_y.shape
print test_x.shape, test_y.shape
print "--------------"
print "Example input:"
print train_x[0]
print "Example label:"
print train_y[0]
In [ ]:
# Show example images - using tile_raster_images helper function from OpenDeep to get 28x28 image from 784 array.
from utils import tile_raster_images
from PIL import Image as pil_img
input_images = train_x[:25]
im = pil_img.fromarray(
tile_raster_images(input_images,
img_shape=(28, 28),
tile_shape=(1, 25),
tile_spacing=(1, 1))
)
im.save("some_mnist_numbers.png")
Image(filename="some_mnist_numbers.png")
In [ ]:
# Your basic Theano imports.
import theano
import theano.tensor as T
x = T.matrix('x')
In [ ]:
# Compute the hidden layer from the input
import numpy
import numpy.random as rng
i = numpy.sqrt(6. / (784+500))
# W_x = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(28*28, 500)), dtype=theano.config.floatX)
W_x = numpy.asarray(rng.uniform(low=-i, high=i, size=(28*28, 500)), dtype=theano.config.floatX)
b_h = numpy.zeros(shape=(500,), dtype=theano.config.floatX)
W_x = theano.shared(W_x, name="W_x")
b_h = theano.shared(b_h, name="b_h")
h = T.tanh(
T.dot(x, W_x) + b_h
)
In [ ]:
# Compute the output class probabilities from the hidden layer
i = numpy.sqrt(6. / (510))
# W_h = numpy.asarray(rng.normal(loc=0.0, scale=.05, size=(500, 10)), dtype=theano.config.floatX)
W_h = numpy.asarray(rng.uniform(low=-i, high=i, size=(500, 10)), dtype=theano.config.floatX)
b_y = numpy.zeros(shape=(10,), dtype="float32")
W_h = theano.shared(W_h, name="W_h")
b_y = theano.shared(b_y, name="b_y")
y = T.nnet.softmax(
T.dot(h, W_h) + b_y
)
# The actual predicted label
y_hat = T.argmax(y, axis=1)
In [ ]:
# Find cost compared to correct labels
correct_labels = T.ivector("labels")
log_likelihood = T.log(y)[T.arange(correct_labels.shape[0]), correct_labels]
cost = -T.mean(log_likelihood)
In [ ]:
# Compute gradient updates for the parameters
parameters = [W_x, b_h, W_h, b_y]
gradients = T.grad(cost, parameters)
learning_rate = 0.01
train_updates = [(param, param - learning_rate*gradient) for param, gradient in zip(parameters, gradients)]
In [ ]:
# Compile function for training (changes parameters via updates) and testing (no updates)
f_train = theano.function(
inputs=[x, correct_labels],
outputs=cost,
updates=train_updates,
allow_input_downcast=True
)
f_test = theano.function(
inputs=[x],
outputs=y_hat,
allow_input_downcast=True
)
In [ ]:
# Main training loop
batch_size = 100
epochs = 300
check_frequency = 3
train_batches = len(train_x) / batch_size
valid_batches = len(valid_x) / batch_size
test_batches = len(test_x) / batch_size
for epoch in range(epochs):
print epoch+1, ":",
train_costs = []
train_accuracy = []
for i in range(train_batches):
batch_x = train_x[i*batch_size:(i+1)*batch_size]
batch_labels = train_y[i*batch_size:(i+1)*batch_size]
costs = f_train(batch_x, batch_labels)
preds = f_test(batch_x)
acc = sum(preds==batch_labels)/float(len(batch_labels))
train_costs.append(costs)
train_accuracy.append(acc)
print "cost:", numpy.mean(train_costs), "\ttrain:", str(numpy.mean(train_accuracy)*100)+"%",
valid_accuracy = []
for i in range(valid_batches):
batch_x = valid_x[i*batch_size:(i+1)*batch_size]
batch_labels = valid_y[i*batch_size:(i+1)*batch_size]
preds = f_test(batch_x)
acc = sum(preds==batch_labels)/float(len(batch_labels))
valid_accuracy.append(acc)
print "\tvalid:", str(numpy.mean(valid_accuracy)*100)+"%",
test_accuracy = []
for i in range(test_batches):
batch_x = test_x[i*batch_size:(i+1)*batch_size]
batch_labels = test_y[i*batch_size:(i+1)*batch_size]
preds = f_test(batch_x)
acc = sum(preds==batch_labels)/float(len(batch_labels))
test_accuracy.append(acc)
print "\ttest:", str(numpy.mean(test_accuracy)*100)+"%"
if (epoch+1) % check_frequency == 0:
print 'saving filters...'
weight_filters = pil_img.fromarray(
tile_raster_images(
W_x.get_value(borrow=True).T,
img_shape=(28, 28),
tile_shape=(20, 25),
tile_spacing=(1, 1)
)
)
weight_filters.save("mlp_filters_%d.png"%(epoch+1))
In [ ]: