Try not to peek at the solutions when you go through the exercises. ;-)

First let's make sure this notebook works well in both Python 2 and Python 3:


In [ ]:
from __future__ import absolute_import, division, print_function, unicode_literals

In [ ]:
import tensorflow as tf
tf.__version__

In [ ]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/")

Techniques for Training Deep Nets

Using He initialization and the ELU activation function (with the help of a partial()):


In [ ]:
from functools import partial

n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        y = tf.placeholder(tf.int32, shape=[None], name="y")

    he_init = tf.contrib.layers.variance_scaling_initializer()
    
    dense_layer = partial(tf.layers.dense,
                          kernel_initializer=he_init,
                          activation=tf.nn.elu)
    hidden1 = dense_layer(X, n_hidden1, name="hidden1")
    hidden2 = dense_layer(hidden1, n_hidden2, name="hidden2")
    logits = dense_layer(hidden2, n_outputs, activation=None, name="output")
    Y_proba = tf.nn.softmax(logits)
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

In [ ]:
n_epochs = 20
batch_size = 50

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)

    save_path = saver.save(sess, "./my_mnist_model")

Exercise 9

In this exercise, you will add a 50% dropout rate to the following neural network model below.

9.1) Add a training placeholder, of type tf.bool.

Tip: you can use tf.placeholder_with_default() to make this False by default.

9.2) Add a dropout layer between the input layer and the first hidden layer, using tf.layers.dropout().


In [ ]:
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        y = tf.placeholder(tf.int32, shape=[None], name="y")

    he_init = tf.contrib.layers.variance_scaling_initializer()

    dense_layer = partial(tf.layers.dense,
                          kernel_initializer=he_init,
                          activation=tf.nn.elu)
    hidden1 = dense_layer(X, n_hidden1, name="hidden1")
    hidden2 = dense_layer(hidden1, n_hidden2, name="hidden2")
    logits = dense_layer(hidden2, n_outputs, activation=None, name="output")
    Y_proba = tf.nn.softmax(logits)
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

In [ ]:


In [ ]:


In [ ]:

9.3) Update the following training code to feed the value of the training placeholder, where appropriate, then run the code and see if the model performs better than without dropout.


In [ ]:
n_epochs = 20
batch_size = 50

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)

    save_path = saver.save(sess, "./my_mnist_model")

In [ ]:


In [ ]:


In [ ]:

Try not to peek at the solution below before you have done the exercise! :)

Exercise 9 - Solution

9.1-2)


In [ ]:
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10

dropout_rate = 0.5                                                               # <= CHANGED

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        y = tf.placeholder(tf.int32, shape=[None], name="y")
        training = tf.placeholder_with_default(False, shape=[], name='training') # <= CHANGED
        X_drop = tf.layers.dropout(X, dropout_rate, training=training)           # <= CHANGED

    he_init = tf.contrib.layers.variance_scaling_initializer()

    dense_layer = partial(tf.layers.dense,
                          kernel_initializer=he_init,
                          activation=tf.nn.elu)
    hidden1 = dense_layer(X_drop, n_hidden1, name="hidden1")                     # <= CHANGED
    hidden2 = dense_layer(hidden1, n_hidden2, name="hidden2")
    logits = dense_layer(hidden2, n_outputs, activation=None, name="output")
    Y_proba = tf.nn.softmax(logits)
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

9.3)


In [ ]:
n_epochs = 20
batch_size = 50

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True}) # <= CHANGED
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)

    save_path = saver.save(sess, "./my_mnist_model")

Early Stopping


In [ ]:
n_epochs = 1000
batch_size = 50

best_acc_val = 0
check_interval = 100
checks_since_last_progress = 0
max_checks_without_progress = 100

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
            if iteration % check_interval == 0:
                acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})
                if acc_val > best_acc_val:
                    best_acc_val = acc_val
                    checks_since_last_progress = 0
                    saver.save(sess, "./my_best_model_so_far")
                else:
                    checks_since_last_progress += 1                    
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val, "Best validation accuracy:", best_acc_val)
        if checks_since_last_progress > max_checks_without_progress:
            print("Early stopping!")
            saver.restore(sess, "./my_best_model_so_far")
            break

    acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})
    print("Final accuracy on test set:", acc_test)
    save_path = saver.save(sess, "./my_mnist_model")

Saving the model to disk so often slows down training. Let's save to RAM instead:


In [ ]:
def get_model_params():
    gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))}

def restore_model_params(model_params):
    gvar_names = list(model_params.keys())
    assign_ops = {gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign")
                  for gvar_name in gvar_names}
    init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
    feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
    tf.get_default_session().run(assign_ops, feed_dict=feed_dict)

In [ ]:
n_epochs = 1000
batch_size = 50

best_acc_val = 0
check_interval = 100
checks_since_last_progress = 0
max_checks_without_progress = 100
best_model_params = None

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
            if iteration % check_interval == 0:
                acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[:2000], y: mnist.validation.labels[:2000]})
                if acc_val > best_acc_val:
                    best_acc_val = acc_val
                    checks_since_last_progress = 0
                    best_model_params = get_model_params()
                else:
                    checks_since_last_progress += 1
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images[2000:], y: mnist.validation.labels[2000:]})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val, "Best validation accuracy:", best_acc_val)
        if checks_since_last_progress > max_checks_without_progress:
            print("Early stopping!")
            break

    if best_model_params:
        restore_model_params(best_model_params)
    acc_test = accuracy.eval(feed_dict={X: mnist.test.images[2000:], y: mnist.test.labels[2000:]})
    print("Final accuracy on test set:", acc_test)
    save_path = saver.save(sess, "./my_mnist_model")