Try not to peek at the solutions when you go through the exercises. ;-)
First let's make sure this notebook works well in both Python 2 and Python 3:
In [ ]:
from __future__ import absolute_import, division, print_function, unicode_literals
In [ ]:
import tensorflow as tf
tf.__version__
In [ ]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/")
Using He initialization and the ELU activation function (with the help of a partial()):
In [ ]:
from functools import partial
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int32, shape=[None], name="y")
he_init = tf.contrib.layers.variance_scaling_initializer()
dense_layer = partial(tf.layers.dense,
kernel_initializer=he_init,
activation=tf.nn.elu)
hidden1 = dense_layer(X, n_hidden1, name="hidden1")
hidden2 = dense_layer(hidden1, n_hidden2, name="hidden2")
logits = dense_layer(hidden2, n_outputs, activation=None, name="output")
Y_proba = tf.nn.softmax(logits)
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
In [ ]:
n_epochs = 20
batch_size = 50
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)
save_path = saver.save(sess, "./my_mnist_model")
In this exercise, you will add a 50% dropout rate to the following neural network model below.
9.1) Add a training placeholder, of type tf.bool.
Tip: you can use tf.placeholder_with_default() to make this False by default.
9.2) Add a dropout layer between the input layer and the first hidden layer, using tf.layers.dropout().
In [ ]:
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int32, shape=[None], name="y")
he_init = tf.contrib.layers.variance_scaling_initializer()
dense_layer = partial(tf.layers.dense,
kernel_initializer=he_init,
activation=tf.nn.elu)
hidden1 = dense_layer(X, n_hidden1, name="hidden1")
hidden2 = dense_layer(hidden1, n_hidden2, name="hidden2")
logits = dense_layer(hidden2, n_outputs, activation=None, name="output")
Y_proba = tf.nn.softmax(logits)
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
In [ ]:
In [ ]:
In [ ]:
9.3) Update the following training code to feed the value of the training placeholder, where appropriate, then run the code and see if the model performs better than without dropout.
In [ ]:
n_epochs = 20
batch_size = 50
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)
save_path = saver.save(sess, "./my_mnist_model")
In [ ]:
In [ ]:
In [ ]:
Try not to peek at the solution below before you have done the exercise! :)
9.1-2)
In [ ]:
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
dropout_rate = 0.5 # <= CHANGED
graph = tf.Graph()
with graph.as_default():
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
y = tf.placeholder(tf.int32, shape=[None], name="y")
training = tf.placeholder_with_default(False, shape=[], name='training') # <= CHANGED
X_drop = tf.layers.dropout(X, dropout_rate, training=training) # <= CHANGED
he_init = tf.contrib.layers.variance_scaling_initializer()
dense_layer = partial(tf.layers.dense,
kernel_initializer=he_init,
activation=tf.nn.elu)
hidden1 = dense_layer(X_drop, n_hidden1, name="hidden1") # <= CHANGED
hidden2 = dense_layer(hidden1, n_hidden2, name="hidden2")
logits = dense_layer(hidden2, n_outputs, activation=None, name="output")
Y_proba = tf.nn.softmax(logits)
with tf.name_scope("train"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
with tf.name_scope("init_and_save"):
init = tf.global_variables_initializer()
saver = tf.train.Saver()
9.3)
In [ ]:
n_epochs = 20
batch_size = 50
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True}) # <= CHANGED
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)
save_path = saver.save(sess, "./my_mnist_model")
In [ ]:
n_epochs = 1000
batch_size = 50
best_acc_val = 0
check_interval = 100
checks_since_last_progress = 0
max_checks_without_progress = 100
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
if iteration % check_interval == 0:
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})
if acc_val > best_acc_val:
best_acc_val = acc_val
checks_since_last_progress = 0
saver.save(sess, "./my_best_model_so_far")
else:
checks_since_last_progress += 1
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val, "Best validation accuracy:", best_acc_val)
if checks_since_last_progress > max_checks_without_progress:
print("Early stopping!")
saver.restore(sess, "./my_best_model_so_far")
break
acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})
print("Final accuracy on test set:", acc_test)
save_path = saver.save(sess, "./my_mnist_model")
Saving the model to disk so often slows down training. Let's save to RAM instead:
In [ ]:
def get_model_params():
gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}
def restore_model_params(model_params):
gvar_names = list(model_params.keys())
assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign")
for gvar_name in gvar_names}
init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
tf.get_default_session().run(assign_ops, feed_dict=feed_dict)
In [ ]:
n_epochs = 1000
batch_size = 50
best_acc_val = 0
check_interval = 100
checks_since_last_progress = 0
max_checks_without_progress = 100
best_model_params = None
with tf.Session(graph=graph) as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
if iteration % check_interval == 0:
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})
if acc_val > best_acc_val:
best_acc_val = acc_val
checks_since_last_progress = 0
best_model_params = get_model_params()
else:
checks_since_last_progress += 1
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})
print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val, "Best validation accuracy:", best_acc_val)
if checks_since_last_progress > max_checks_without_progress:
print("Early stopping!")
break
if best_model_params:
restore_model_params(best_model_params)
acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})
print("Final accuracy on test set:", acc_test)
save_path = saver.save(sess, "./my_mnist_model")