Classification of MNIST dataset using Convolutional Neural Networks -CNN and TensorFlow
Till now, for classifying the MNIST dataset we have seen single layer perceptron giving test accuracy ~ .92
Multilayer network with 2 hidden layers got us test accuracy ~ .95 (both not good for the real world problems)
Now we are going to understand how to tie "convolution","pooling", "Relu", "fc" (fully-connected) and "softmax" layers together.
This model gives Test Accuracy of .99
In [1]:
# import
import tensorflow as tf
import numpy as np
seed = 21
np.random.seed = seed
# import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST", one_hot=True)
In [2]:
# params
NUM_CLASS = 10
INPUT_SIZE = 28
INPUT_SIZE_FLAT = INPUT_SIZE * INPUT_SIZE
# network params
LEARNING_RATE = 0.001
DROPOUT = 0.6
BATCH_SIZE = 100
TRAINING_EPOCHS = 200000
DISPLAY_STEP = 100
# input placeholders
images = tf.placeholder(tf.float32, [None, INPUT_SIZE_FLAT], name="images")
labels = tf.placeholder(tf.float32, [None, NUM_CLASS], name="labels")
keep_prob = tf.placeholder(tf.float32, name='dropout')
# network params
weights = {
# 5x5 conv filter size, 1 in_channel, 32 out_channel
'conv1_w': tf.Variable(tf.truncated_normal([5,5,1,32], stddev=0.1, seed=seed,name="conv1_w")),
# 5x5 conv filter size, 32 in_channel, 64 out_channel
'conv2_w': tf.Variable(tf.truncated_normal([5,5,32,64], stddev=0.1, seed=seed, name="conv2_w")),
# fully connected layer, 7*7*64 inputs, 1024 output
'fc_w': tf.Variable(tf.truncated_normal([7*7*64, 1024], stddev=0.1, seed=seed, name="fc_w")),
# output layer(softmax layer), 1024 inputs, 10 outputs [NUM_CLASS]
'soft_w': tf.Variable(tf.truncated_normal([1024, NUM_CLASS], stddev=0.1, seed=seed, name="softmax_w"))
}
biases = {
# number of biases for conv1 layer = out_channels
'conv1_b': tf.Variable(tf.random_normal([32], stddev=0.1, seed=seed, name="conv1_b")),
# number of bias for conv2 layer = out_channel
'conv2_b': tf.Variable(tf.random_normal([64], stddev=0.1, seed=seed, name="conv2_b")),
# number of bias for fc layer = output
'fc_b': tf.Variable(tf.random_normal([1024], stddev=0.1, seed=seed, name="fc_b")),
# number of bias for softmax(output) layer
'soft_b': tf.Variable(tf.random_normal([10],stddev=0.1, seed=seed, name="soft_b"))
}
In [3]:
####### convolutional layer1 #######
# reshape the input from (2d)[28x28] to (4d)[BATCH_SIZE, in_height, in_width, in_channels]
images_reshaped = tf.reshape(images, shape=[-1, 28, 28, 1], name="reshape_input")
# conv
conv1 = tf.nn.conv2d(input=images_reshaped, filter=weights["conv1_w"], strides=[1,1,1,1], padding='SAME')
# relu
conv1 = tf.nn.relu(features=(conv1+biases["conv1_b"]))
# pool
conv1 = tf.nn.max_pool(value=conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name="output_of_conv1")
# output dimension => BATCH_SIZE x 14 x 14 x 32
In [4]:
####### convolutional layer2 #######
# conv
conv2 = tf.nn.conv2d(input=conv1, filter=weights["conv2_w"], strides=[1,1,1,1], padding='SAME')
# relu
conv2 = tf.nn.relu(features=(conv2+biases["conv2_b"]))
# pool
conv2 = tf.nn.max_pool(value=conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name="output_of_conv2")
# output dimension => BATCH_SIZE x 7 x 7 x 64
In [5]:
####### fully connected layer #######
# reshape output of conv2(4d) to (2d) to be used by fully connected layer
fc = tf.reshape(tensor=conv2, shape=[-1, weights["fc_w"].get_shape().as_list()[0]], name="fc_reshape")
# fc
fc = tf.add(tf.matmul(fc,weights["fc_w"]), biases["fc_b"])
# relu
fc = tf.nn.relu(fc)
# dropout
fc = tf.nn.dropout(fc, keep_prob=keep_prob, seed=seed, name="output_of_fc")
In [6]:
####### softmax layer #######
logits = tf.add(tf.matmul(fc, weights["soft_w"]), biases["soft_b"])
In [7]:
# cross entropy
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
# loss
loss = tf.reduce_mean(cross_entropy)
# optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE,name='Adam').minimize(loss)
# accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
In [8]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
step = 1
while step * BATCH_SIZE < TRAINING_EPOCHS:
batch_images, batch_labels = mnist.train.next_batch(BATCH_SIZE)
# Run optimization op (backprop)
sess.run(fetches=optimizer, feed_dict={images:batch_images, labels:batch_labels, keep_prob:DROPOUT})
if step % DISPLAY_STEP == 0:
# Calculate batch loss and accuracy
los, acc = sess.run([loss, accuracy], feed_dict={images:batch_images, labels:batch_labels, keep_prob:1.0})
print("iter " + str(step*BATCH_SIZE) + ", batch_loss " + "{:.4f}".format(los) + ", acc "+ "{:.4}".format(acc))
step += 1
print("optimization finished")
# time to calculate test accuracy
print("Testing accuracy ", sess.run(fetches=accuracy,feed_dict={images:mnist.test.images,
labels:mnist.test.labels,
keep_prob:1.0}))
tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, data_format=None, name=None)
This computes a 2-D convolution given 4-D input
and filter
tensors.
[batch, in_height, in_width, in_channels]
[filter_height, filter_width, in_channels, out_channels]
strides[0] = strides[3] = 1
. strides = [1, stride, stride, 1]
.SAME
tf.nn.max_pool(value, ksize, strides, padding, data_format='NHWC', name=None)
This performs the max pooling on the input.
Tensor
with shape [batch, height, width, channels]
and type tf.float32
.'VALID'
or 'SAME'
. The padding algorithm.tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None, name=None)
This computes dropout
Tensor
with the same type as x. The probability that each element is kept.Tensor
of type int32
, representing the shape for randomly generated keep/drop flags.