Import all the required libraries.
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
Print the TensorFlow version.
Initialize the filter size and number of filters for both the convolutional layers. Also, intialize the total number of nodes in the convolutional layer.
filter_size1 = 5
num_filters1 = 16
filter_size2 = 5
num_filters2 = 36
fc_size = 128
Download and extract the training and testing images and labels sets from the MNIST dataset.
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST/', one_hot=True)
Display the sizes of the labels in each sets of the MNIST data.
print("Size of:")
print("- Training-set:\t\t{}".format(len(data.train.labels)))
print("- Test-set:\t\t{}".format(len(data.test.labels)))
print("- Validation-set:\t{}".format(len(data.validation.labels)))
Calculate the class number for each label in the test set.
data.test.cls = np.argmax(data.test.labels, axis=1)
Initialize the dimensions of the image and we know that image size is 28 pixels for the MNIST images.
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
num_channels = 1
num_classes = 10
Create a method for displaying images.
def display_images(images, cls_true, cls_pred=None):
assert len(images) == len(cls_true) == 16
fig, axes = plt.subplots(4, 4)
fig.subplots_adjust(hspace=0.3, wspace=0.3)
for i, ax in enumerate(axes.flat):
ax.imshow(images[i].reshape(img_shape), cmap='binary')
if cls_pred is None:
xlabel = "True: {0}".format(cls_true[i])
xlabel = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])
Plot few sample images.
images = data.test.images[0:16]
cls_true = data.test.cls[0:16]
display_images(images=images, cls_true=cls_true)
Create a new TensorFlow variables and initialize with the random values.
def new_weights(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
def new_biases(length):
return tf.Variable(tf.constant(0.05, shape=[length]))
Create a convolutional layer method in the computational graph for TensorFlow by using, input, number of channels, filter size, number of filters. Input is assumed as four-dimensional tensor with these (Image number, X-axis of image, Y-axis of image, Channels of each image) dimensions.
def new_conv_layer(input, num_input_channels, filter_size, num_filters, use_pooling=True):
shape = [filter_size, filter_size, num_input_channels, num_filters]
weights = new_weights(shape=shape)
biases = new_biases(length=num_filters)
layer = tf.nn.conv2d(input=input,filter=weights,strides=[1, 1, 1, 1],padding='SAME')
layer += biases
if use_pooling:
layer = tf.nn.max_pool(value=layer,ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='SAME')
layer = tf.nn.relu(layer)
return layer, weights
The output of the convolutional layer is a 4-dimension tensor and so need to reduce to lower dimension. Create a flatter layer method which reduce the 4-dimension tensor to 2-dimension so that the output can be used as input to the fully connected layer. Reshape the input from [image number, image height, image width, number of channels] to [image number, number of features] where the number of features is the product of image height, image width and number of channels.
def flatten_layer(layer):
layer_shape = layer.get_shape()
num_features = layer_shape[1:4].num_elements()
layer_flat = tf.reshape(layer, [-1, num_features])
return layer_flat, num_features
Now create a fully connected layer method with input of 2-dimension tensor and calculate the layers by calculating the mathematical formula weight*input + biases after creating weights and biases.
def new_fc_layer(input,num_inputs,num_outputs,use_relu=True):
weights = new_weights(shape=[num_inputs, num_outputs])
biases = new_biases(length=num_outputs)
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return layer
Define a placeholder variable for the input image.
x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x')
Reshape the variable x to 4-dimension tensor because the input to the convolutional layer is 4-dimension tensor.
x_image = tf.reshape(x, [-1, img_size, img_size, num_channels])
Similarly, create a placeholder variable for true labels.
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
Also, create placeholder varible for the class number using argmax method.
y_true_cls = tf.argmax(y_true, dimension=1)
Now create a first convolutional layer by calling convolutional layer method as defined before and use pooling to down sample the image resolution by 2.
layer_conv1, weights_conv1 = new_conv_layer(input=x_image,num_input_channels=num_channels,filter_size=filter_size1,
Verify the dimension or shape of the tensor from the output of the first convolutional layer.
Create a second convolutional layer by again calling the convolutional layer method in which the output from the 1st convolutional layer is given as input to the second convolutional layer.
layer_conv2, weights_conv2 = new_conv_layer(input=layer_conv1,num_input_channels=num_filters1,filter_size=filter_size2,
num_filters=num_filters2, use_pooling=True)
Verify the dimension or shape of the tensor from the output of the second convolutional layer.
Flatten the output of the 2nd convolutional layer from 4-dimensional tensor to 2-dimensional tensor.
layer_flat, num_features = flatten_layer(layer_conv2)
Verify the dimension of the flattened layer and calculate the number of features.
Implement the fully connected layer by passing flattened layer as input and also pass number of features.
layer_fc1 = new_fc_layer(input=layer_flat,num_inputs=num_features,num_outputs=fc_size,use_relu=True)
Now add another fully connected layer that outputs vectors each of length 10.
layer_fc2 = new_fc_layer(input=layer_fc1,num_inputs=fc_size,num_outputs=num_classes,use_relu=False)
Normalize the output of the second fully connected layer by using softmax method.
y_pred = tf.nn.softmax(layer_fc2)
Also, calculate the class number which is the index of the largest element.
y_pred_cls = tf.argmax(y_pred, dimension=1)
Now, calculate the cross-entropy for the output of the second fully connected layer using softmax.
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,labels=y_true)
find the average of cross-entropy of all the image classification.
cost = tf.reduce_mean(cross_entropy)
Create an AdamOptimizer which is an advanced form of Gradient Descent.
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
Create a boolean vector where the prediction class equals the true class.
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
Calculate the classification accuracy.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Create a TensorFlow session to execute a TensorFlow graph.
session = tf.Session()
Select certain batch of input images.
train_batch_size = 64
Create an optimize method to optimize the variables of the network layers and for each iteration, a new batch of data is selected from the training data.
total_iterations = 0
def optimize(num_iterations):
global total_iterations
start_time = time.time()
for i in range(total_iterations,
total_iterations + num_iterations):
x_batch, y_true_batch = data.train.next_batch(train_batch_size)
feed_dict_train = {x: x_batch,y_true: y_true_batch}, feed_dict=feed_dict_train)
if i % 100 == 0:
acc =, feed_dict=feed_dict_train)
msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
print(msg.format(i + 1, acc))
total_iterations += num_iterations
end_time = time.time()
time_dif = end_time - start_time
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
Create method to plot images from the test set that are wrongly classified.
def display_errors(cls_pred, correct):
incorrect = (correct == False)
images = data.test.images[incorrect]
cls_pred = cls_pred[incorrect]
cls_true = data.test.cls[incorrect]
Create a method to implement the classification accuracy for a test-set. Here, the test-set is splitted in to small number of batches.
test_batch_size = 256
def print_test_accuracy(show_example_errors=False):
num_test = len(data.test.images)
cls_pred = np.zeros(shape=num_test,
i = 0
while i < num_test:
j = min(i + test_batch_size, num_test)
images = data.test.images[i:j, :]
labels = data.test.labels[i:j, :]
feed_dict = {x: images,y_true: labels}
cls_pred[i:j] =, feed_dict=feed_dict)
i = j
cls_true = data.test.cls
correct = (cls_true == cls_pred)
correct_sum = correct.sum()
acc = float(correct_sum) / num_test
msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
print(msg.format(acc, correct_sum, num_test))
if show_example_errors:
display_errors(cls_pred=cls_pred, correct=correct)
optimize(num_iterations=900) # We performed 100 iterations above.
