Try not to peek at the solutions when you go through the exercises. ;-)
First let's make sure this notebook works well in both Python 2 and Python 3:
In [ ]:
from __future__ import absolute_import, division, print_function, unicode_literals
In [ ]:
import tensorflow as tf
tf.__version__
In [ ]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/")
In [ ]:
def get_model_params():
gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}
Load demo image:
In [ ]:
from scipy.misc import imread
In [ ]:
china = imread("./images/china.png")
In [ ]:
china.shape
In [ ]:
def plot_image(image):
cmap = "gray" if len(image.shape) == 2 else None
plt.imshow(image, cmap=cmap, interpolation="nearest")
plt.axis("off")
In [ ]:
plt.figure(figsize=(10,7))
plot_image(china)
Crop it and convert it to grayscale:
In [ ]:
image = china[150:220, 130:250].mean(axis=2).astype(np.float32)
image.shape
In [ ]:
height, width = image.shape
channels = 1 # grayscale
In [ ]:
plt.figure(figsize=(10,6))
plot_image(image)
In [ ]:
basic_filters = np.zeros(shape=(7, 7, 1, 2), dtype=np.float32) # height, width, in channels, out channels
basic_filters[:, 3, 0, 0] = 1
basic_filters[3, :, 0, 1] = 1
plot_image(basic_filters[:, :, 0, 0])
plt.show()
plot_image(basic_filters[:, :, 0, 1])
plt.show()
In [ ]:
graph = tf.Graph()
with graph.as_default():
X = tf.placeholder(tf.float32, shape=(None, height, width, channels))
filters = tf.constant(basic_filters)
convolution = tf.nn.conv2d(X, filters, strides=[1,1,1,1], padding="SAME")
In [ ]:
with tf.Session(graph=graph) as sess:
X_batch = image.reshape(1, height, width, 1)
output = convolution.eval(feed_dict={X: X_batch})
In [ ]:
plt.figure(figsize=(10,6))
plot_image(output[0, :, :, 0])
In [ ]:
plt.figure(figsize=(10,6))
plot_image(output[0, :, :, 1])
Now let's add a max pooling layer:
In [ ]:
graph = tf.Graph()
with graph.as_default():
X = tf.placeholder(tf.float32, shape=(None, height, width, channels))
filters = tf.constant(basic_filters)
convolution = tf.nn.conv2d(X, filters, strides=[1,1,1,1], padding="SAME")
max_pool = tf.nn.max_pool(convolution, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
In [ ]:
with tf.Session(graph=graph) as sess:
X_batch = image.reshape(1, height, width, 1)
output = max_pool.eval(feed_dict={X: X_batch})
In [ ]:
plt.figure(figsize=(5,3))
plot_image(output[0, :, :, 0])
In [ ]:
plt.figure(figsize=(5,3))
plot_image(output[0, :, :, 1])
In this final exercise, you will tackle MNIST and reach over 99% accuracy using most of what you learned in this course:
In [ ]:
In [ ]:
In [ ]:
Try not to peek at the solution below before you have done the exercise! :)
In [ ]:
height = 28
width = 28
channels = 1
conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"
conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 1
conv2_pad = "SAME"
conv2_dropout_rate = 0.25
pool3_fmaps = conv2_fmaps
n_fc1 = 128
fc1_dropout_rate = 0.5
n_inputs = 28 * 28
n_outputs = 10
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
y = tf.placeholder(tf.int32, shape=[None], name="y")
training = tf.placeholder_with_default(False, shape=[], name='training')
conv1 = tf.layers.conv2d(X_reshaped, conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name="conv2")
with tf.name_scope("pool3"):
pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 14 * 14])
pool3_flat_drop = tf.layers.dropout(pool3_flat, conv2_dropout_rate, training=training)
with tf.name_scope("fc1"):
fc1 = tf.layers.dense(pool3_flat_drop, n_fc1, activation=tf.nn.relu, name="fc1")
fc1_drop = tf.layers.dropout(fc1, fc1_dropout_rate, training=training)
with tf.name_scope("output"):
logits = tf.layers.dense(fc1_drop, n_outputs, name="output")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
Now let training begin, using early stopping. This is quite slow on a CPU, but much faster on a GPU. We achieve >99% accuracy on the test set.
In [ ]:
def restore_model_params(model_params):
gvar_names = list(model_params.keys())
assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign")
for gvar_name in gvar_names}
init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
tf.get_default_session().run(assign_ops, feed_dict=feed_dict)
In [ ]:
n_epochs = 1000
batch_size = 50
best_acc_val = 0
check_interval = 100
checks_since_last_progress = 0
max_checks_without_progress = 100
best_model_params = None
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
if iteration % check_interval == 0:
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})
if acc_val > best_acc_val:
best_acc_val = acc_val
checks_since_last_progress = 0
best_model_params = get_model_params()
else:
checks_since_last_progress += 1
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val, "Best validation accuracy:", best_acc_val)
if checks_since_last_progress > max_checks_without_progress:
print("Early stopping!")
break
if best_model_params:
restore_model_params(best_model_params)
acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})
print("Final accuracy on test set:", acc_test)
save_path = saver.save(sess, "./my_mnist_model")
In [ ]:
with tf.Session(graph=graph) as sess:
init.run()
saver.restore(sess, "./my_mnist_model")
Y_proba_val = Y_proba.eval(feed_dict={X: mnist.test.images[2000:2400]})
In [ ]:
for image, y_label, y_proba in zip(mnist.test.images[2000:2400], mnist.test.labels[2000:2400], Y_proba_val):
y_pred = np.argmax(y_proba)
if y_pred != y_label:
print("Label: {}, Prediction: {}, Probabilities: {}".format(
y_label, y_pred,
" ".join(["{}={:.1f}%".format(n, 100*p)
for n, p in enumerate(y_proba) if p > 0.01])))
plt.imshow(image.reshape(28, 28), cmap="binary")
plt.axis("off")
plt.show()
I hope you enjoyed this course!