MLP MNIST with L2 regularizer


In [9]:
'''
MLP network for MNIST digits classification with L2 reg
Test accuracy: 95.3%
'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# numpy package
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.datasets import mnist
from tensorflow.keras.regularizers import l2, l1
from tensorflow.keras.utils import to_categorical

from numpy.random import seed
seed(12345)
import tensorflow as tf
tf.random.set_seed(12345)

# load mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# compute the number of labels
num_labels = len(np.unique(y_train))

# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# image dimensions (assumed square)
image_size = x_train.shape[1]
input_size = image_size * image_size
# for mlp, the input dim is a vector, so we reshape
x_train = np.reshape(x_train, [-1, input_size])
# we train our network using float data
x_train = x_train.astype('float32') / 255
x_test = np.reshape(x_test, [-1, input_size])
x_test = x_test.astype('float32') / 255

# network parameters
batch_size = 128
hidden_units = 256

kernel_regularizer = l2(0.0001)
# this is 3-layer MLP with ReLU and l2 kernel regularizer
model = Sequential()
model.add(Dense(hidden_units,
                kernel_regularizer=kernel_regularizer,
                input_dim=input_size))
model.add(Activation('relu'))
model.add(Dense(hidden_units,
                kernel_regularizer=kernel_regularizer))
model.add(Activation('relu'))
model.add(Dense(num_labels))
# this is the output for one-hot vector
model.add(Activation('softmax'))
model.summary()

# loss function for one-hot vector
# use of sgd optimizer
# accuracy is good metric for classification tasks
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
# train the network
model.fit(x_train, y_train, epochs=20, batch_size=batch_size)

# validate the model on test dataset to determine generalization
score = model.evaluate(x_test,
                       y_test,
                       batch_size=batch_size,
                       verbose=False)
print("\nTest accuracy: %.1f%%" % (100.0 * score[1]))


Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_21 (Dense)             (None, 256)               200960    
_________________________________________________________________
activation_21 (Activation)   (None, 256)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 256)               65792     
_________________________________________________________________
activation_22 (Activation)   (None, 256)               0         
_________________________________________________________________
dense_23 (Dense)             (None, 10)                2570      
_________________________________________________________________
activation_23 (Activation)   (None, 10)                0         
=================================================================
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples
Epoch 1/20
60000/60000 [==============================] - 4s 70us/sample - loss: 1.2198 - accuracy: 0.7331
Epoch 2/20
60000/60000 [==============================] - 3s 58us/sample - loss: 0.5291 - accuracy: 0.8793
Epoch 3/20
60000/60000 [==============================] - 3s 51us/sample - loss: 0.4349 - accuracy: 0.8981
Epoch 4/20
60000/60000 [==============================] - 3s 51us/sample - loss: 0.3943 - accuracy: 0.9078
Epoch 5/20
60000/60000 [==============================] - 3s 53us/sample - loss: 0.3688 - accuracy: 0.9144
Epoch 6/20
60000/60000 [==============================] - 4s 59us/sample - loss: 0.3495 - accuracy: 0.9197
Epoch 7/20
60000/60000 [==============================] - 3s 53us/sample - loss: 0.3337 - accuracy: 0.9245
Epoch 8/20
60000/60000 [==============================] - 3s 53us/sample - loss: 0.3205 - accuracy: 0.9277
Epoch 9/20
60000/60000 [==============================] - 3s 53us/sample - loss: 0.3083 - accuracy: 0.9312
Epoch 10/20
60000/60000 [==============================] - 4s 63us/sample - loss: 0.2979 - accuracy: 0.9344
Epoch 11/20
60000/60000 [==============================] - 4s 62us/sample - loss: 0.2879 - accuracy: 0.9374
Epoch 12/20
60000/60000 [==============================] - 4s 62us/sample - loss: 0.2792 - accuracy: 0.9403
Epoch 13/20
60000/60000 [==============================] - 4s 63us/sample - loss: 0.2708 - accuracy: 0.9425
Epoch 14/20
60000/60000 [==============================] - 4s 59us/sample - loss: 0.2631 - accuracy: 0.9447
Epoch 15/20
60000/60000 [==============================] - 3s 54us/sample - loss: 0.2556 - accuracy: 0.9465
Epoch 16/20
60000/60000 [==============================] - 3s 55us/sample - loss: 0.2491 - accuracy: 0.9482
Epoch 17/20
60000/60000 [==============================] - 3s 54us/sample - loss: 0.2427 - accuracy: 0.9499
Epoch 18/20
60000/60000 [==============================] - 3s 55us/sample - loss: 0.2365 - accuracy: 0.9512
Epoch 19/20
60000/60000 [==============================] - 3s 58us/sample - loss: 0.2311 - accuracy: 0.9531
Epoch 20/20
60000/60000 [==============================] - 4s 65us/sample - loss: 0.2255 - accuracy: 0.9546

Test accuracy: 95.3%

In [ ]:


In [ ]: