MLP MNIST with dropout


In [5]:
'''
MLP network for MNIST digits classification with Dropout
Test accuracy: 95.5%

'''

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# numpy package
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

from numpy.random import seed
seed(12345)
import tensorflow as tf
tf.random.set_seed(12345)

# load mnist dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# compute the number of labels
num_labels = len(np.unique(y_train))

# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# image dimensions (assumed square)
image_size = x_train.shape[1]
input_size = image_size * image_size
# for mlp, the input dim is a vector, so we reshape
x_train = np.reshape(x_train, [-1, input_size])
# we train our network using float data
x_train = x_train.astype('float32') / 255
x_test = np.reshape(x_test, [-1, input_size])
x_test = x_test.astype('float32') / 255

# network parameters
batch_size = 128
hidden_units = 256
dropout = 0.2

# this is 3-layer MLP with ReLU. Dropout reg.
model = Sequential()
model.add(Dense(hidden_units, input_dim=input_size))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(hidden_units))
model.add(Activation('relu'))
model.add(Dropout(dropout))
model.add(Dense(num_labels))
# this is the output for one-hot vector
model.add(Activation('softmax'))
model.summary()

# loss function for one-hot vector
# use of sgd optimizer
# accuracy is good metric for classification tasks
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
# train the network
model.fit(x_train,y_train, epochs=20, batch_size=batch_size)

# validate the model on test dataset to determine generalization
score = model.evaluate(x_test,
                       y_test,
                       batch_size=batch_size,
                       verbose=False)
print("\nTest accuracy: %.1f%%" % (100.0 * score[1]))


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_9 (Dense)              (None, 256)               200960    
_________________________________________________________________
activation_9 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 256)               65792     
_________________________________________________________________
activation_10 (Activation)   (None, 256)               0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 10)                2570      
_________________________________________________________________
activation_11 (Activation)   (None, 10)                0         
=================================================================
Total params: 269,322
Trainable params: 269,322
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples
Epoch 1/20
60000/60000 [==============================] - 6s 99us/sample - loss: 1.3041 - accuracy: 0.6369
Epoch 2/20
60000/60000 [==============================] - 5s 81us/sample - loss: 0.5944 - accuracy: 0.8287
Epoch 3/20
60000/60000 [==============================] - 6s 99us/sample - loss: 0.4671 - accuracy: 0.8633
Epoch 4/20
60000/60000 [==============================] - 7s 111us/sample - loss: 0.4123 - accuracy: 0.8804
Epoch 5/20
60000/60000 [==============================] - 6s 107us/sample - loss: 0.3771 - accuracy: 0.8901
Epoch 6/20
60000/60000 [==============================] - 7s 109us/sample - loss: 0.3495 - accuracy: 0.8982
Epoch 7/20
60000/60000 [==============================] - 8s 132us/sample - loss: 0.3260 - accuracy: 0.9049
Epoch 8/20
60000/60000 [==============================] - 7s 122us/sample - loss: 0.3079 - accuracy: 0.9109
Epoch 9/20
60000/60000 [==============================] - 7s 119us/sample - loss: 0.2911 - accuracy: 0.9148
Epoch 10/20
60000/60000 [==============================] - 7s 123us/sample - loss: 0.2788 - accuracy: 0.9187
Epoch 11/20
60000/60000 [==============================] - 6s 108us/sample - loss: 0.2672 - accuracy: 0.9230
Epoch 12/20
60000/60000 [==============================] - 7s 113us/sample - loss: 0.2549 - accuracy: 0.9258
Epoch 13/20
60000/60000 [==============================] - 7s 120us/sample - loss: 0.2447 - accuracy: 0.9282
Epoch 14/20
60000/60000 [==============================] - 7s 119us/sample - loss: 0.2344 - accuracy: 0.9317
Epoch 15/20
60000/60000 [==============================] - 7s 116us/sample - loss: 0.2270 - accuracy: 0.9339
Epoch 16/20
60000/60000 [==============================] - 6s 105us/sample - loss: 0.2187 - accuracy: 0.9368
Epoch 17/20
60000/60000 [==============================] - 6s 105us/sample - loss: 0.2104 - accuracy: 0.9384
Epoch 18/20
60000/60000 [==============================] - 6s 97us/sample - loss: 0.2045 - accuracy: 0.9409
Epoch 19/20
60000/60000 [==============================] - 4s 75us/sample - loss: 0.2000 - accuracy: 0.9414
Epoch 20/20
60000/60000 [==============================] - 4s 70us/sample - loss: 0.1921 - accuracy: 0.9434

Test accuracy: 95.5%

In [ ]: