In [9]:

    
'''
MLP network for MNIST digits classification
Test accuracy: 95.3%
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# numpy package
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

from numpy.random import seed
seed(12345)
import tensorflow as tf
tf.random.set_seed(12345)

# load mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# compute the number of labels
num_labels = len(np.unique(y_train))

# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# image dimensions (assumed square)
image_size = x_train.shape[1]
input_size = image_size * image_size
# for mlp, the input dim is a vector, so we reshape
x_train = np.reshape(x_train, [-1, input_size])
# we train our network using float data
x_train = x_train.astype('float32') / 255
x_test = np.reshape(x_test, [-1, input_size])
x_test = x_test.astype('float32') / 255

# network parameters
batch_size = 128
hidden_units = 256

# this is 3-layer MLP with ReLU. No regularizer
model = Sequential()
model.add(Dense(hidden_units, input_dim=input_size))
model.add(Activation('relu'))
model.add(Dense(hidden_units))
model.add(Activation('relu'))
model.add(Dense(num_labels))
# this is the output for one-hot vector
model.add(Activation('softmax'))
model.summary()

# loss function for one-hot vector
# use of sgd optimizer
# accuracy is good metric for classification tasks
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
# train the network
model.fit(x_train, y_train, epochs=20, batch_size=batch_size)

# validate the model on test dataset to determine generalization
score = model.evaluate(x_test, 
                       y_test, 
                       batch_size=batch_size,
                       verbose=False)
print("\nTest accuracy: %.1f%%" % (100.0 * score[1]))









    



Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_18 (Dense)             (None, 256)               200960    
_________________________________________________________________
activation_18 (Activation)   (None, 256)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 256)               65792     
_________________________________________________________________
activation_19 (Activation)   (None, 256)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 10)                2570      
_________________________________________________________________
activation_20 (Activation)   (None, 10)                0         
=================================================================
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples
Epoch 1/20
60000/60000 [==============================] - 3s 53us/sample - loss: 1.1551 - accuracy: 0.7331
Epoch 2/20
60000/60000 [==============================] - 3s 47us/sample - loss: 0.4637 - accuracy: 0.8794
Epoch 3/20
60000/60000 [==============================] - 3s 43us/sample - loss: 0.3692 - accuracy: 0.8982
Epoch 4/20
60000/60000 [==============================] - 2s 37us/sample - loss: 0.3284 - accuracy: 0.9078
Epoch 5/20
60000/60000 [==============================] - 2s 39us/sample - loss: 0.3028 - accuracy: 0.9144
Epoch 6/20
60000/60000 [==============================] - 2s 38us/sample - loss: 0.2834 - accuracy: 0.9197
Epoch 7/20
60000/60000 [==============================] - 2s 38us/sample - loss: 0.2675 - accuracy: 0.9246
Epoch 8/20
60000/60000 [==============================] - 2s 40us/sample - loss: 0.2543 - accuracy: 0.9279
Epoch 9/20
60000/60000 [==============================] - 4s 61us/sample - loss: 0.2420 - accuracy: 0.9314
Epoch 10/20
60000/60000 [==============================] - 3s 51us/sample - loss: 0.2315 - accuracy: 0.9345
Epoch 11/20
60000/60000 [==============================] - 3s 50us/sample - loss: 0.2215 - accuracy: 0.9375
Epoch 12/20
60000/60000 [==============================] - 3s 58us/sample - loss: 0.2127 - accuracy: 0.9404
Epoch 13/20
60000/60000 [==============================] - 3s 57us/sample - loss: 0.2043 - accuracy: 0.9426
Epoch 14/20
60000/60000 [==============================] - 3s 55us/sample - loss: 0.1965 - accuracy: 0.9449
Epoch 15/20
60000/60000 [==============================] - 3s 51us/sample - loss: 0.1890 - accuracy: 0.9468
Epoch 16/20
60000/60000 [==============================] - 4s 63us/sample - loss: 0.1824 - accuracy: 0.9483
Epoch 17/20
60000/60000 [==============================] - 3s 52us/sample - loss: 0.1760 - accuracy: 0.9501
Epoch 18/20
60000/60000 [==============================] - 3s 51us/sample - loss: 0.1697 - accuracy: 0.9514
Epoch 19/20
60000/60000 [==============================] - 3s 51us/sample - loss: 0.1643 - accuracy: 0.9534
Epoch 20/20
60000/60000 [==============================] - 3s 52us/sample - loss: 0.1587 - accuracy: 0.9549

Test accuracy: 95.3%

MLP MNIST classifier with no regularization