Author: Philip Reschke (http://www.philipreschke.com)
Project: https://github.com/PhilipReschke/TensorFlow-Code-Examples
I will build a deep CNN using a combination of convolution, max pooling, and fully connected layers. The test accuracy should be about 99%+ after 15+ epochs! I only ran the below workbook for 2 epocs - go head with your own GPU!
In [1]:
import tensorflow as tf
I use the MNIST database of handwritten digits found @ http://yann.lecun.com/exdb/mnist/.
I am not reshaping the images as I need them in their original dimension in order to apply convolution and pooling.
In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist_data = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=False)
In this section, I am defining:
In [3]:
# Hyper parameters
training_epochs = 2
learning_rate = 0.005
batch_size = 256
# Network parameters
n_classes = 10
keep_probability = 0.70
valid_size = 256
test_size = 4096
print_loss_for_each_batch = 10
# Network placeholders
x = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1])
y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes])
keep_prob = tf.placeholder(dtype=tf.float32)
Before I can build my graph, I need to define the layers that I want to use. I am building a function for the following layers:
In [4]:
def layer_conv2d(x_tensor, num_outputs, k_size, strides, padding='SAME'):
"""
Apply convolution to x_tensor
:param x_tensor: TensorFlow Tensor
:param num_outputs: Number of outputs for the convolutional layer
:param k_size: kernal size 2-D Tuple for the convolutional layer
:param strides: Stride 2-D Tuple for convolution
:param padding: Desired padding - 'SAME' or 'VALID'
: return: A tensor that represents convolution of x_tensor
"""
# Define weight and bias
W = tf.Variable(tf.truncated_normal(shape=[*k_size, x_tensor.get_shape().as_list()[3], num_outputs],
stddev=0.1) * tf.sqrt(2.0/x_tensor.get_shape().as_list()[3]))
b = tf.Variable(tf.truncated_normal(shape=[num_outputs],
stddev=0.1))
# Calculate Conv Layer
conv_layer = tf.nn.conv2d(input=x_tensor,
filter=W,
strides=[1, *strides, 1],
padding=padding)
conv_layer = tf.nn.bias_add(conv_layer, b)
# Apply batch normalization before activation
batch_mean, batch_var = tf.nn.moments(conv_layer, [0])
scale = tf.Variable(tf.ones([num_outputs]))
beta = tf.Variable(tf.zeros([num_outputs]))
conv_layer = tf.nn.batch_normalization(conv_layer, batch_mean, batch_var, beta, scale, 0.000001)
# Conv activation using ReLu
conv_layer = tf.nn.relu(conv_layer)
return conv_layer
def layer_max_pool(x_tensor, k_size, strides, padding='SAME'):
"""
Apply max pooling to x_tensor
:param x_tensor: TensorFlow Tensor
:param k_size: kernal size 2-D Tuple for pool
:param strides: Stride 2-D Tuple for pool
:param padding: Desired padding - 'SAME' or 'VALID'
: return: A tensor that represents max pooling of x_tensor
"""
# Apply max pooling
conv_layer = tf.nn.max_pool(value=x_tensor,
ksize=[1, *k_size, 1],
strides=[1, *strides, 1],
padding=padding)
return conv_layer
def layer_flatten(x_tensor):
"""
Flatten x_tensor to (Batch Size, Flattened Image Size)
: x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
: return: A tensor of size (Batch Size, Flattened Image Size).
"""
# Calculate image size
img_size = x_tensor.get_shape().as_list()[1] * x_tensor.get_shape().as_list()[2] * x_tensor.get_shape().as_list()[3]
flat_layer = tf.reshape(tensor=x_tensor, shape=[-1, img_size])
return flat_layer
def layer_fully_connected(x_tensor, num_outputs, relu=True):
"""
Apply a Fully Connected layer to x_tensor
:param x_tensor: Flattened TensorFlow Tensor
:param num_outputs: Number of outputs for the fully connected layer
:param relu: Boolean turning on or off activation using ReLu
: return: A x_tensor to which a fully connected layer has been applied
"""
# Define weight and bias
W = tf.Variable(tf.truncated_normal(shape=[x_tensor.get_shape().as_list()[1], num_outputs],
stddev=0.1) * tf.sqrt(2.0/x_tensor.get_shape().as_list()[1]))
b = tf.Variable(tf.truncated_normal(shape=[num_outputs],
stddev=0.1))
# Calculate fully connected layer
fc_layer = tf.add(tf.matmul(x_tensor, W), b)
# Apply batch normalization before activation
batch_mean, batch_var = tf.nn.moments(fc_layer, [0])
scale = tf.Variable(tf.ones([num_outputs]))
beta = tf.Variable(tf.zeros([num_outputs]))
fc_layer = tf.nn.batch_normalization(fc_layer, batch_mean, batch_var, beta, scale, 0.000001)
# Fully connected layer activation using ReLu
if relu:
fc_layer = tf.nn.relu(fc_layer)
return fc_layer
def layer_output(x_tensor, num_outputs):
"""
Apply a output layer to x_tensor using weight and bias
: x_tensor: A 2-D tensor where the first dimension is batch size.
: num_outputs: The number of output that the new tensor should be.
: return: A 2-D tensor where the second dimension is num_outputs.
"""
# Define weight and bias
W = tf.Variable(tf.truncated_normal(shape=[x_tensor.get_shape().as_list()[1], num_outputs],
stddev=0.1) * tf.sqrt(2.0/x_tensor.get_shape().as_list()[1]))
b = tf.Variable(tf.truncated_normal(shape=[num_outputs],
stddev=0.1))
# Calculate output layer
output_layer = tf.add(tf.matmul(x_tensor, W), b)
return output_layer
To improve the prediction accuracy of the CovNet, I have decided on a fairly deep network utilizing a mix of convolution, pooling and fully connected layers.
In [5]:
def cov_network(x, keep_prob):
"""
Create a convolutional neural network model
: x: Placeholder tensor that holds image data.
: keep_prob: Placeholder tensor that hold dropout keep probability.
: return: Tensor that represents logits
"""
# Layer: Convolution #1-1
net = layer_conv2d(x_tensor=x,
num_outputs=64,
k_size=(3,3),
strides=(1,1),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Convolution #1-2
net = layer_conv2d(x_tensor=net,
num_outputs=64,
k_size=(3,3),
strides=(1,1),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Max Pool #1-1
net = layer_max_pool(x_tensor=net,
k_size=(2, 2),
strides=(2, 2),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Convolution #2-1
net = layer_conv2d(x_tensor=net,
num_outputs=128,
k_size=(3,3),
strides=(1,1),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Convolution #2-2
net = layer_conv2d(x_tensor=net,
num_outputs=128,
k_size=(3,3),
strides=(1,1),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Max Pool #2-1
net = layer_max_pool(x_tensor=net,
k_size=(2, 2),
strides=(2, 2),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Convolution #3-1
net = layer_conv2d(x_tensor=net,
num_outputs=256,
k_size=(3,3),
strides=(1,1),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Convolution #3-2
net = layer_conv2d(x_tensor=net,
num_outputs=256,
k_size=(3,3),
strides=(1,1),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Max Pool #3-1
net = layer_max_pool(x_tensor=net,
k_size=(2, 2),
strides=(2, 2),
padding='SAME')
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Flatten
net = layer_flatten(net)
# Layer: Fully Connected #1
net = layer_fully_connected(x_tensor=net, num_outputs=1024, relu=True)
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Fully Connected #2
net = layer_fully_connected(x_tensor=net, num_outputs=1024, relu=True)
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Fully Connected #3
net = layer_fully_connected(x_tensor=net, num_outputs=512, relu=False)
net = tf.nn.dropout(x=net, keep_prob=keep_prob)
# Layer: Output
net = layer_output(x_tensor=net, num_outputs=10)
# Return Output Layer
return net
Finally, a simple output to calculate the train, validation and test accuracy.
In [6]:
# Calculate logits
logits = cov_network(x, keep_prob)
# Calculate loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss=cost)
# Calculate accuracy percentage
correct_prediction = tf.equal(tf.argmax(logits, axis=1), tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
# Define global variable initializer
init = tf.global_variables_initializer()
# Launch session
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
for batch in range(mnist_data.train.num_examples//batch_size):
# Get x and y values for the given batch
batch_x, batch_y = mnist_data.train.next_batch(batch_size)
sess.run(optimizer, feed_dict={x: batch_x,
y: batch_y,
keep_prob: keep_probability})
# Calculate batch loss and accuracy
loss, training_accuracy = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
# Compute graph with respect to validation data
validation_accuracy = sess.run(accuracy, feed_dict={
x: mnist_data.validation.images[:valid_size],
y: mnist_data.validation.labels[:valid_size],
keep_prob: 1.})
# Display logs per batch step
if batch % print_loss_for_each_batch == 0:
print('Epoch {:>2}, Batches {:>3}, Loss: {:>.4f}, Train Accuracy: {:.4f}, Val Accuracy: {:.4f}'.format(
epoch + 1, # epoch starts at 0
batch + 1, # batch starts at 0
loss,
training_accuracy,
validation_accuracy))
# Calculate Test Accuracy
test_acc = sess.run(accuracy, feed_dict={
x: mnist_data.test.images[:test_size],
y: mnist_data.test.labels[:test_size],
keep_prob: 1.})
print('Testing Accuracy: {}'.format(test_acc))