Try not to peek at the solutions when you go through the exercises. ;-)

First let's make sure this notebook works well in both Python 2 and Python 3:



In [ ]:

    
from __future__ import absolute_import, division, print_function, unicode_literals



In [ ]:

    
import tensorflow as tf
tf.__version__



In [ ]:

    
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("tmp/data/")

Organizing Your Code



In [ ]:

    
def neural_net_layer(inputs, n_neurons, activation=None, seed=None):
    n_inputs = int(inputs.get_shape()[1])
    b = tf.Variable(tf.zeros([n_neurons]), name="b")
    W = tf.Variable(tf.random_uniform([n_inputs, n_neurons], -1.0, 1.0, seed=seed), name="W")
    logits = tf.matmul(inputs, W) + b
    if activation:
        return activation(logits)
    else:
        return logits

Let's simplify our code by using neural_net_layer():



In [ ]:

    
n_inputs = 28 * 28
n_hidden1 = 100
n_outputs = 10

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        y = tf.placeholder(tf.int32, shape=[None], name="y")

#########################################################################
# This section is simplified (the rest is unchanged)
#
    with tf.name_scope("hidden1"):
        hidden1 = neural_net_layer(X, n_hidden1, activation=tf.nn.relu) # <= CHANGED

    with tf.name_scope("output"):
        logits = neural_net_layer(hidden1, n_outputs)                   # <= CHANGED
        Y_proba = tf.nn.softmax(logits, name="Y_proba")
#
#
#########################################################################
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()



In [ ]:

    
[var.op.name for var in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]

Let's check that training still works:



In [ ]:

    
n_epochs = 20
batch_size = 50

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)

    save_path = saver.save(sess, "./my_mnist_model")

Now let's use tf.layers.dense() instead:



In [ ]:

    
n_inputs = 28 * 28
n_hidden1 = 100
n_outputs = 10

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        y = tf.placeholder(tf.int32, shape=[None], name="y")

    with tf.name_scope("hidden1"):
        hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1") # <= CHANGED

    with tf.name_scope("output"):
        logits = tf.layers.dense(hidden1, n_outputs, name="output")                    # <= CHANGED
        Y_proba = tf.nn.softmax(logits)
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()



In [ ]:

    
[var.op.name for var in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]

Let's check that training still works:



In [ ]:

    
n_epochs = 20
batch_size = 50

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)

    save_path = saver.save(sess, "./my_mnist_model")

Now suppose you want two more hidden layers with shared weights & biases. Let's use variable scopes for this:



In [ ]:

    
n_inputs = 28 * 28
n_hidden = 100
n_outputs = 10

graph = tf.Graph()
with graph.as_default():
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        y = tf.placeholder(tf.int32, shape=[None], name="y")

    hidden1 = tf.layers.dense(X, n_hidden, activation=tf.nn.relu, name="hidden1")                    # <= CHANGED
    hidden2 = tf.layers.dense(hidden1, n_hidden, activation=tf.nn.relu, name="hidden23")             # <= CHANGED
    hidden3 = tf.layers.dense(hidden2, n_hidden, activation=tf.nn.relu, name="hidden23", reuse=True) # <= CHANGED

    with tf.name_scope("output"):
        logits = tf.layers.dense(hidden3, n_outputs, name="output")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")
    
    with tf.name_scope("train"):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)

    with tf.name_scope("eval"):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.name_scope("init_and_save"):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()



In [ ]:

    
[var.op.name for var in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)]

Check that training works well:



In [ ]:

    
n_epochs = 20
batch_size = 50

with tf.Session(graph=graph) as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict={X: mnist.validation.images, y: mnist.validation.labels})
        print(epoch, "Train accuracy:", acc_train, "Validation accuracy:", acc_val)

    save_path = saver.save(sess, "./my_mnist_model")

How would we implement variable sharing in neural_net_layer()?



In [ ]:

    
def neural_net_layer(inputs, n_neurons, activation=None, name=None, reuse=None, seed=None):
    with tf.variable_scope(name, default_name="layer", reuse=reuse):
        n_inputs = int(inputs.get_shape()[1])
        rnd_init = lambda shape, dtype, partition_info: tf.random_uniform(shape, -1.0, 1.0, dtype=dtype, seed=seed)
        b = tf.get_variable("biases", shape=[n_neurons], initializer=rnd_init)
        W = tf.get_variable("weights", shape=[n_inputs, n_neurons], initializer=rnd_init)
        logits = tf.matmul(inputs, W) + b
        if activation:
            return activation(logits)
        else:
            return logits



In [ ]:

    
graph = tf.Graph()
with graph.as_default():
    with tf.variable_scope("foo"): 
        a = tf.constant(1., name="a")
        with tf.name_scope("bar"): 
            b = tf.constant(2., name="b")
            with tf.name_scope("baz"):
                c = tf.get_variable("c", shape=[], initializer=tf.constant_initializer(2))
                s = tf.add_n([a,b,c], name="s")



In [ ]:

    
a.name



In [ ]:

    
b.name



In [ ]:

    
c.name



In [ ]:

    
s.name