This example is taken from here: http://deeplearning.net/tutorial/gettingstarted.html
In [ ]:
import academictorrents as at
import cPickle, gzip
import sys, os, time
import numpy as np
import theano, lasagne
import theano.tensor as T
import matplotlib.pyplot as plt
%matplotlib inline
In [58]:
####################
##### Options ######
model='mlp' # 'cnn' or 'mlp'
num_epochs=100
In [2]:
#Download MNIST from Academic torrents
filename = at.get("323a0048d87ca79b68f12a6350a57776b6a3b7fb")
In [ ]:
In [61]:
# Load the dataset
mnist = gzip.open(filename, 'rb')
train_set, validation_set, test_set = cPickle.load(mnist)
mnist.close()
X_train = train_set[0].reshape(-1, 1, 28, 28)
y_train = train_set[1]
X_val = validation_set[0].reshape(-1, 1, 28, 28)
y_val = validation_set[1]
X_test = test_set[0].reshape(-1, 1, 28, 28)
y_test = test_set[1]
In [ ]:
In [62]:
print("Label", y_train[0])
plt.imshow(X_train[0][0], cmap='gray', interpolation='none')
Out[62]:
In [63]:
print("Label", y_train[1])
plt.imshow(X_train[1][0], cmap='gray', interpolation='none')
Out[63]:
In [ ]:
In [ ]:
In [64]:
def build_mlp(input_var=None):
# This creates an MLP of two hidden layers of 800 units each, followed by
# a softmax output layer of 10 units. It applies 20% dropout to the input
# data and 50% dropout to the hidden layers.
# Input layer, specifying the expected input shape of the network
# (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
# linking it to the given Theano variable `input_var`, if any:
l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28), input_var=input_var)
# Apply 20% dropout to the input data:
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
# Add a fully-connected layer of 800 units, using the linear rectifier, and
# initializing weights with Glorot's scheme (which is the default anyway):
l_hid1 = lasagne.layers.DenseLayer(
l_in_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
# We'll now add dropout of 50%:
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
# Another 800-unit layer:
l_hid2 = lasagne.layers.DenseLayer(
l_hid1_drop, num_units=800,
nonlinearity=lasagne.nonlinearities.rectify)
# 50% dropout again:
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
# Finally, we'll add the fully-connected output layer, of 10 softmax units:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=10,
nonlinearity=lasagne.nonlinearities.softmax)
# Each layer is linked to its incoming layer(s), so we only need to pass
# the output layer to give access to a network in Lasagne:
return l_out
In [65]:
def build_cnn(input_var=None):
# As a third model, we'll create a CNN of two convolution + pooling stages
# and a fully-connected hidden layer in front of the output layer.
# Input layer, as usual:
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
# This time we do not apply input dropout, as it tends to work less well
# for convolutional layers.
# Convolutional layer with 32 kernels of size 5x5. Strided and padded
# convolutions are supported as well; see the docstring.
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(5, 5),
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
# Expert note: Lasagne provides alternative convolutional layers that
# override Theano's choice of which implementation to use; for details
# please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.
# Max-pooling layer of factor 2 in both dimensions:
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
# Another convolution with 32 5x5 kernels, and another 2x2 pooling:
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(5, 5),
nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
# A fully-connected layer of 256 units with 50% dropout on its inputs:
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=256,
nonlinearity=lasagne.nonlinearities.rectify)
# And, finally, the 10-unit output layer with 50% dropout on its inputs:
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=10,
nonlinearity=lasagne.nonlinearities.softmax)
return network
In [66]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
In [ ]:
In [67]:
# Prepare Theano variables for inputs and targets
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
if model == 'mlp':
network = build_mlp(input_var)
elif model == 'cnn':
network = build_cnn(input_var)
else:
print("Unrecognized model type %r." % model)
In [68]:
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.
# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01, momentum=0.9)
# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),dtype=theano.config.floatX)
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)
In [ ]:
In [ ]:
In [ ]:
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))
In [ ]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
test_err += err
test_acc += acc
test_batches += 1
print("Final results:")
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
In [ ]:
In [ ]:
In [ ]:
In [ ]: