Try not to peek at the solutions when you go through the exercises. ;-)
First let's make sure this notebook works well in both Python 2 and Python 3:
In [ ]:
from __future__ import absolute_import, division, print_function, unicode_literals
In [ ]:
import tensorflow as tf
tf.__version__
In [ ]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
Visit the TensorFlow Playground.
In [ ]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/")
In [ ]:
batch_size = 3
X_batch, y_batch = mnist.train.next_batch(batch_size)
X_batch.shape
In [ ]:
for image_data in X_batch:
plt.imshow(image_data.reshape([28, 28]), cmap="binary", interpolation="nearest")
plt.show()
In [ ]:
y_batch
8.1) Take a close look at the following neural network model and make sure you understand every line. Next, add an extra hidden layer composed of 100 neurons.
In [ ]:
n_inputs = 28 * 28
n_hidden1 = 100
n_outputs = 10
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int32, shape=[None], name="y")
with tf.name_scope("hidden1"):
b1 = tf.Variable(tf.zeros([n_hidden1]), name="b1")
W1 = tf.Variable(tf.random_uniform([n_inputs, n_hidden1], -1.0, 1.0), name="W1")
hidden1 = tf.nn.relu(tf.matmul(X, W1) + b1)
with tf.name_scope("output"):
b2 = tf.Variable(tf.zeros([n_outputs]), name="b2")
W2 = tf.Variable(tf.random_uniform([n_hidden1, n_outputs], -1.0, 1.0), name="W2")
logits = tf.matmul(hidden1, W2) + b2
Y_proba = tf.nn.softmax(logits, name="Y_proba")
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
8.2) Write the training code, and train the model for about 20 epochs (i.e. enough training iterations to go through the training set 20 times). Evaluate it on the test set: you should get over 95% accuracy.
Hint: you should open a session, initialize the variables, then write the main training loop. Inside it you should use minst.train.next_batch(batch_size) to get the next training batch (say with batch_size=50), then run the training_op, feeding it the training batch (don't forget to feed both X and y). Every few hundred iterations, evaluate the model's accuracy on the validation set (mnist.validation.images and mnist.validation.labels), and print the result. At the end of training, save the model.
In [ ]:
In [ ]:
In [ ]:
8.3) Bonus question: load the model you just trained and saved, and use it to make predictions on the first 200 images of the test set (mnist.test). Display the images that the model got wrong, and show the class probabilities that it guessed. Notice that some of the images it gets wrong are pretty poorly written, but some are obvious to us humans. We will see that Convolutional Neural Networks can do a much better job and reach human performance.
In [ ]:
In [ ]:
In [ ]:
Try not to peek at the solution below before you have done the exercise! :)
8.1)
In [ ]:
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int32, shape=[None], name="y")
with tf.name_scope("hidden1"):
b1 = tf.Variable(tf.zeros([n_hidden1]), name="b1")
W1 = tf.Variable(tf.random_uniform([n_inputs, n_hidden1], -1.0, 1.0), name="W1")
hidden1 = tf.nn.relu(tf.matmul(X, W1) + b1)
with tf.name_scope("hidden2"):
b2 = tf.Variable(tf.zeros([n_hidden2]), name="b2")
W2 = tf.Variable(tf.random_uniform([n_hidden1, n_hidden2], -1.0, 1.0), name="W2")
hidden2 = tf.nn.relu(tf.matmul(hidden1, W2) + b2)
with tf.name_scope("output"):
b3 = tf.Variable(tf.zeros([n_outputs]), name="b3")
W3 = tf.Variable(tf.random_uniform([n_hidden2, n_outputs], -1.0, 1.0), name="W3")
logits = tf.matmul(hidden2, W3) + b3
Y_proba = tf.nn.softmax(logits, name="Y_proba")
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
8.2)
In [ ]:
n_epochs = 20
batch_size = 50
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)
save_path = saver.save(sess, "./my_mnist_model")
8.3)
In [ ]:
graph = tf.Graph()
with tf.Session(graph=graph) as sess:
saver = tf.train.import_meta_graph("./my_mnist_model.meta")
saver.restore(sess, "./my_mnist_model")
X = graph.get_tensor_by_name("inputs/X:0")
Y_proba = graph.get_tensor_by_name("output/Y_proba:0")
Y_proba_val = Y_proba.eval(feed_dict={X: mnist.test.images})
In [ ]:
for example_index in range(200):
y_proba = Y_proba_val[example_index]
y_pred = np.argmax(y_proba)
y_label = mnist.test.labels[example_index]
if y_pred != y_label:
print("Actual class:{}, Predicted class: {}, Main probabilities: {}".format(
y_label,
y_pred,
", ".join(["{}:{:.1f}%".format(n, 100*p)
for n, p in enumerate(y_proba) if p > 0.01])))
plt.imshow(mnist.test.images[example_index].reshape([28, 28]), cmap="binary", interpolation="nearest")
plt.show()
In [ ]: