Simple example of running a MNIST MLP model in neon

Step 1: Import the layers and other modules needed for the MNIST MLP model


In [2]:
%matplotlib inline

# MNIST MLP example
import logging

logging.basicConfig(level=20)
logger = logging.getLogger()

from neon.backends import gen_backend
from neon.layers import FCLayer, DataLayer, CostLayer
from neon.models import MLP
from neon.transforms import RectLin, Logistic, CrossEntropy
from neon.datasets import MNIST
from neon.experiments import FitPredictErrorExperiment


INFO:neon.util.compat:using xrange as range

Step 2: Run the model for 10 epochs and save the weights after each epoch


In [3]:
import shutil
from os import remove
import cPickle as pkl
from os.path import expanduser, exists

file_path = expanduser('~/data/MNIST/mnist-mlp.prm')
if exists(file_path):
    remove(file_path)
train_err = []
test_err = []
max_epochs = 10

for num_epochs in range(0,max_epochs+1): 
    # set up the layers
    layers = []
    layers.append(DataLayer(nout=784))
    layers.append(FCLayer(nout=200, activation=RectLin()))
    layers.append(FCLayer(nout=200, activation=RectLin()))
    layers.append(FCLayer(nout=200, activation=RectLin()))
    layers.append(FCLayer(nout=10, activation=Logistic()))
    layers.append(CostLayer(cost=CrossEntropy()))

    # set up the model and experiment
    model = MLP(num_epochs=num_epochs, batch_size=128, layers=layers,
                serialized_path=file_path)
    # Uncomment line below to run on CPU backend
    backend = gen_backend(rng_seed=0)
    # Uncomment line below to run on GPU using cudanet backend
    # backend = gen_backend(rng_seed=0, gpu='cudanet')
    dataset = MNIST(repo_path='~/data/')
    experiment = FitPredictErrorExperiment(model=model,
                                           backend=backend,
                                           dataset=dataset)

    # Run the training, and dump weights
    dest_path = expanduser('~/data/MNIST/mnist-ep' + str(num_epochs) + '.prm')
    if num_epochs > 0:
        res = experiment.run()
        train_err.append(res['train']['MisclassPercentage_TOP_1'])
        test_err.append(res['test']['MisclassPercentage_TOP_1'])
        # Save the weights at this epoch
        shutil.copy2(file_path, dest_path)

    else:
        params = layers[1].weights.asnumpyarray()
        pkl.dump(params, open(dest_path,'w'))


INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
INFO:neon.datasets.mnist:loading: train-images-idx3-ubyte
INFO:neon.datasets.mnist:loading: train-labels-idx1-ubyte
INFO:neon.datasets.mnist:loading: t10k-images-idx3-ubyte
INFO:neon.datasets.mnist:loading: t10k-labels-idx1-ubyte
WARNING:neon.datasets.dataset:Incompatible batch size. Discarding 16 samples...
WARNING:neon.datasets.dataset:Incompatible batch size. Discarding 96 samples...
WARNING:neon.datasets.dataset:Incompatible batch size. Discarding 16 samples...
WARNING:neon.datasets.dataset:Incompatible batch size. Discarding 96 samples...
INFO:neon.experiments.fit:Unable to find saved model /Users/arjun/data/MNIST/mnist-mlp.prm, starting over
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 0, training error: 1.98806
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 8.54367
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 7.92768
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 1, training error: 0.31793
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 3.93630
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 3.49225
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 2, training error: 0.22308
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 3.70593
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 2.94638
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 3, training error: 0.18462
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 3.24519
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 2.38048
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 4, training error: 0.13446
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 2.99479
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 1.82626
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 5, training error: 0.11090
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 2.78446
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 1.59923
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 6, training error: 0.09527
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 2.60417
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 1.36886
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 7, training error: 0.07803
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 2.73438
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 1.47403
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 8, training error: 0.07625
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 2.62420
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 1.19024
INFO:neon.backends:Cudanet backend, RNG seed: 0, numerr: None
INFO:neon.models.mlp:Layers:
	DataLayer layer: 784 nodes
	FCLayer layer: 784 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 200 nodes, RectLin act_fn
	FCLayer layer: 200 inputs, 10 nodes, Logistic act_fn
	CostLayer layer: 10 nodes, CrossEntropy cost_fn

INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 784)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (200, 200)
INFO:neon.params.val_init:Generating GaussianValGen values of shape (10, 200)
WARNING:neon.util.persist:deserializing object from:  /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.models.mlp:commencing model fitting
INFO:neon.models.mlp:epoch: 9, training error: 0.06536
WARNING:neon.util.persist:serializing object to: /Users/arjun/data/MNIST/mnist-mlp.prm
INFO:neon.experiments.fit_predict_err:test set MisclassPercentage_TOP_1 2.57412
INFO:neon.experiments.fit_predict_err:train set MisclassPercentage_TOP_1 1.23698

Step 3: Visualize the weights


In [5]:
import numpy as np
from matplotlib import pyplot, cm
from IPython.html import widgets
from IPython.html.widgets import interact

nrows = 10
ncols = 20
def plot_filters(**kwargs):
    n = kwargs['n']
    dest_path = expanduser('~/data/MNIST/mnist-ep' + str(n) + '.prm')
    params = pkl.load(open(dest_path, 'r'))
    if n>0:
        wts = params['layer_1']['weights']
    else:
        wts = params    

    fi = 0
    W = np.zeros((28*nrows, 28*ncols))
    for row, col in [(row, col) for row in range(nrows) for col in range(ncols)]:
        W[28*row:28*(row+1):,28*col:28*(col+1)] = wts[fi].reshape(28,28)
        fi = fi + 1

    pyplot.matshow(W, cmap=cm.gray)
    pyplot.title('Visualizing the 1st layer weights')
    pyplot.show()
    
_i = interact(plot_filters,
               n=widgets.IntSliderWidget(description='epochs',
                                         min=0, max=max_epochs, value=0))


Step 4: Visualize the train and test error


In [6]:
pyplot.plot(range(1, max_epochs+1), train_err, linewidth=3, label='train')
pyplot.plot(range(1, max_epochs+1), test_err, linewidth=3, label='test')
pyplot.grid()
pyplot.legend()
pyplot.xlabel("epoch")
pyplot.ylabel("error %")
pyplot.show()



In [ ]: