In [1]:
# import
import tensorflow as tf
# import MINST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST", one_hot=True)
In [2]:
# parameters
num_classes = 10
img_size = 28
img_size_flat = img_size * img_size # 784
batch_size = 100
learning_rate = 0.01
training_epochs = 1000
# hidden layers parameters
hidden_1 = 2048 # number of features in hidden layer 1
hidden_2 = 2048 # number of features in hidden layer 2
''' "matrix_multiplication" layer by layer explained-->
# input_layer ==>
x_data [batch_size,784]
# hidden_layer_1 ==>
x_data[batch_size,784] 'mat_mul' w1[784,2048] + b1[2048] ==> output_of_hidden_layer_1[batch_size, 2048]
# hidden_layer_2 ==>
output_of_hidden_layer_1[batch_size,2048] 'mat_mul' w2[2048,2048] + b2[2048] ==> output_of_hidden_layer_1[batch_size,2048]
#output_layer(final) ==>
output_of_hidden_layer_1[batch_size,2048] 'mat_mul' out_w[2048,10] + out_b[10] ==> final prediction[batch_size, 10]
'''
# input data x
x_data = tf.placeholder(tf.float32, [None, img_size_flat], name="x_data") # shape = (batch_size, 784)
# input data labels
x_labels = tf.placeholder(tf.float32, [None, num_classes], name="x_labels") # shape = (batch_size, 10)
# random weights and biases
# we will make them Variable because we have to initialize with random values and then later train them
weights = {'w1': tf.Variable(tf.random_normal([img_size_flat,hidden_1]), name="weights1"),
'w2': tf.Variable(tf.random_normal([hidden_1, hidden_2]), name="weights2"),
'out': tf.Variable(tf.random_normal([hidden_2, num_classes]), name="weights_out")}
biases = {'b1': tf.Variable(tf.random_normal([hidden_1]),name="biases1"),
'b2': tf.Variable(tf.random_normal([hidden_2]),name="biases2"),
'out': tf.Variable(tf.random_normal([num_classes]),name="biases_out")}
# creating the network with 2 hidden layers and activation function as 'Relu'
# input layer:
# data will be feed in batches
# layer_1:
layer_1 = tf.add(tf.matmul(x_data, weights['w1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1, name="layer_1")
# layer_2:
layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2, name="layer_2")
# output layer:
# we don't apply any activation here
logits = tf.add(tf.matmul(layer_2, weights['out']) , biases['out'], name="logits")
# from here it's gonna be same as single layer network
# introduce softmax non-linearity
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=x_labels, logits=logits, name="cross_entropy")
# sum of all the cross_entropy is the cost of our model
cost = tf.reduce_mean(cross_entropy, name="cost")
#optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
### Now that the computation graph is ready, we need to run it using tf.Session()
# initialize the variables
init = tf.global_variables_initializer()
# define a session with default graph
sess = tf.Session()
sess.run(init)
for _ in range(training_epochs):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(fetches=optimizer, feed_dict={x_data:batch_x, x_labels:batch_y})
how_many_correct = tf.equal(tf.argmax(x_labels, axis=1), tf.argmax(logits, axis=1), name="howmanycorrect")
# accuracy
accuracy = tf.reduce_mean(tf.cast(how_many_correct, tf.float32), name="accuracy")
# print final accuracy on test data
print("accuracy ", sess.run(fetches=accuracy, feed_dict={x_data:mnist.test.images, x_labels:mnist.test.labels}))