Classification of hand-written dataset MNIST using 2 hidden layer network and relu as the activation function

Accuracy ~ 95


In [1]:
# import 
import tensorflow as tf

# import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST", one_hot=True)


Extracting data/MNIST\train-images-idx3-ubyte.gz
Extracting data/MNIST\train-labels-idx1-ubyte.gz
Extracting data/MNIST\t10k-images-idx3-ubyte.gz
Extracting data/MNIST\t10k-labels-idx1-ubyte.gz

In [2]:
# parameters
num_classes = 10
img_size = 28 
img_size_flat = img_size * img_size # 784
batch_size = 100
learning_rate = 0.01
training_epochs = 1000

# hidden layers parameters
hidden_1 = 2048 # number of features in hidden layer 1
hidden_2 = 2048 # number of features in hidden layer 2

''' "matrix_multiplication" layer by layer explained-->

# input_layer  ==>             
x_data [batch_size,784]

# hidden_layer_1  ==>  
x_data[batch_size,784]  'mat_mul'  w1[784,2048]  +  b1[2048]  ==> output_of_hidden_layer_1[batch_size, 2048]

# hidden_layer_2  ==>  
output_of_hidden_layer_1[batch_size,2048]  'mat_mul'  w2[2048,2048]  +  b2[2048] ==> output_of_hidden_layer_1[batch_size,2048]

#output_layer(final) ==>
output_of_hidden_layer_1[batch_size,2048] 'mat_mul'  out_w[2048,10]  +  out_b[10]  ==>  final prediction[batch_size, 10]
'''

# input data x
x_data = tf.placeholder(tf.float32, [None, img_size_flat], name="x_data") # shape = (batch_size, 784)

# input data labels 
x_labels = tf.placeholder(tf.float32, [None, num_classes], name="x_labels") # shape = (batch_size, 10)

# random weights and biases 
# we will make them Variable because we have to initialize with random values and then later train them
weights = {'w1': tf.Variable(tf.random_normal([img_size_flat,hidden_1]), name="weights1"),
          'w2': tf.Variable(tf.random_normal([hidden_1, hidden_2]), name="weights2"),
          'out': tf.Variable(tf.random_normal([hidden_2, num_classes]), name="weights_out")}

biases = {'b1': tf.Variable(tf.random_normal([hidden_1]),name="biases1"),
         'b2': tf.Variable(tf.random_normal([hidden_2]),name="biases2"),
         'out': tf.Variable(tf.random_normal([num_classes]),name="biases_out")}


# creating the network with 2 hidden layers and activation function as 'Relu'

# input layer:
# data will be feed in batches

# layer_1:
layer_1 = tf.add(tf.matmul(x_data, weights['w1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1, name="layer_1")

# layer_2:
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2, name="layer_2")

# output layer:
# we don't apply any activation here
logits = tf.add(tf.matmul(layer_2, weights['out']) , biases['out'], name="logits")


# from here it's gonna be same as single layer network

# introduce softmax non-linearity
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=x_labels, logits=logits, name="cross_entropy")

# sum of all the cross_entropy is the cost of our model
cost = tf.reduce_mean(cross_entropy, name="cost")

#optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

### Now that the computation graph is ready, we need to run it using tf.Session()

# initialize the variables
init = tf.global_variables_initializer()

# define a session with default graph
sess = tf.Session()
sess.run(init)

for _ in range(training_epochs):
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    sess.run(fetches=optimizer, feed_dict={x_data:batch_x, x_labels:batch_y})
    
how_many_correct = tf.equal(tf.argmax(x_labels, axis=1), tf.argmax(logits, axis=1), name="howmanycorrect")

# accuracy
accuracy = tf.reduce_mean(tf.cast(how_many_correct, tf.float32), name="accuracy")

# print final accuracy on test data
print("accuracy ", sess.run(fetches=accuracy, feed_dict={x_data:mnist.test.images, x_labels:mnist.test.labels}))


accuracy  0.9519