In [ ]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import numpy as np
import tensorflow as tf

Boilerplate for graph visualization


In [ ]:
# This is for graph visualization.

from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

Load the data

Run 00_download_data.ipynb if you haven't already


In [ ]:
DATA_DIR = '../data/'
data_filename = os.path.join(DATA_DIR, "zoo.npz")
data = np.load(open(data_filename))

train_data = data['arr_0']
train_labels = data['arr_1']
test_data = data['arr_2']
test_labels = data['arr_3']
del data
print("Data shapes: ", test_data.shape, test_labels.shape, train_data.shape, train_labels.shape)

Create a simple classifier with low-level TF Ops


In [ ]:
tf.reset_default_graph()

input_dimension = train_data.shape[1]   # 784 = 28*28 pixels
output_dimension = train_labels.shape[1]  # 10 classes

batch_size = 32
hidden1_units = 128

data_batch =  tf.placeholder("float", shape=[None, input_dimension], name="data")
label_batch = tf.placeholder("float", shape=[None, output_dimension], name="labels")

weights_1 = tf.Variable(
    tf.truncated_normal(
        [input_dimension, hidden1_units], 
        stddev=1.0 / np.sqrt(float(input_dimension))),
    name='weights_1')

# Task: Add Bias to first layer
# Task: Use Cross-Entropy instead of Squared Loss

# SOLUTION: Create biases variable.
biases_1 = tf.Variable(
    tf.truncated_normal(
        [hidden1_units], 
        stddev=1.0 / np.sqrt(float(hidden1_units))),
    name='biases_1')

weights_2 = tf.Variable(
    tf.truncated_normal(
        [hidden1_units, output_dimension], 
        stddev=1.0 / np.sqrt(float(hidden1_units))),
    name='weights_2')

# SOLUTION: Add the bias term to the first layer
wx_b = tf.add(tf.matmul(data_batch, weights_1), biases_1)
hidden_activations = tf.nn.relu(wx_b)
output_activations = tf.nn.tanh(tf.matmul(hidden_activations, weights_2))

# SOLUTION: Replace the l2 loss with softmax cross entropy.
with tf.name_scope("loss"):
    # loss = tf.nn.l2_loss(label_batch - output_activations)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=label_batch, 
            logits=output_activations))

show_graph(tf.get_default_graph().as_graph_def())

We can run this graph by feeding in batches of examples using a feed_dict. The keys of the feed_dict are placeholders we've defined previously. The first argument of session.run is the tensor that we're computing. Only parts of the graph required to produce this value will be executed.


In [ ]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
  
    random_indices = np.random.permutation(train_data.shape[0])
    for i in range(1000):
        batch_start_idx = (i % (train_data.shape[0] // batch_size)) * batch_size
        batch_indices = random_indices[batch_start_idx:batch_start_idx + batch_size]
        batch_loss = sess.run(
            loss, 
            feed_dict = {
                data_batch : train_data[batch_indices,:],
                label_batch : train_labels[batch_indices,:]
            })
        if (i + 1) % 100 == 0:
            print("Loss at iteration {}: {}".format(i+1, batch_loss))

No learning yet but we get the losses per batch. We need to add an optimizer to the graph.


In [ ]:
# Task: Replace GradientDescentOptimizer with AdagradOptimizer and a 0.1 learning rate.
# learning_rate = 0.005
# updates = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

# SOLUTION: Replace GradientDescentOptimizer
learning_rate = 0.1
updates = tf.train.AdagradOptimizer(learning_rate).minimize(loss)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    
    random_indices = np.random.permutation(train_data.shape[0])
    n_epochs = 10  # how often do to go through the training data
    max_steps = train_data.shape[0]*n_epochs // batch_size
    for i in range(max_steps):
        batch_start_idx = (i % (train_data.shape[0] // batch_size)) * batch_size
        batch_indices = random_indices[batch_start_idx:batch_start_idx+batch_size]
        batch_loss, _ = sess.run(
            [loss, updates], 
            feed_dict = {
                data_batch : train_data[batch_indices,:],
                label_batch : train_labels[batch_indices,:]
            })

        if i % 200 == 0 or i == max_steps - 1:
            random_indices = np.random.permutation(train_data.shape[0])
            print("Batch-Loss at iteration {}: {}".format(i, batch_loss))

            test_predictions = sess.run(
                output_activations, 
                feed_dict = {
                    data_batch : test_data,
                    label_batch : test_labels
                })
            wins = np.argmax(test_predictions, axis=1) == np.argmax(test_labels, axis=1)
            print("Accuracy on test: {}%".format(100*np.mean(wins)))

Loss going down, Accuracy going up! \o/

Notice how batch loss differs between batches.

Model wrapped in a custom estimator

In TensorFlow, we can make it easier to experiment with different models when we separately define a model_fn and an input_fn.


In [ ]:
tf.reset_default_graph()

# Model parameters.
batch_size = 32
hidden1_units = 128
learning_rate = 0.005
input_dimension = train_data.shape[1]   # 784 = 28*28 pixels
output_dimension = train_labels.shape[1]  # 6 classes
n_epochs = 10  # how often do to go through the training data


def input_fn(data, labels):
    input_images = tf.constant(data, shape=data.shape, verify_shape=True, dtype=tf.float32)
    input_labels = tf.constant(labels, shape=labels.shape, verify_shape=True, dtype=tf.float32)
    image, label = tf.train.slice_input_producer(
        [input_images, input_labels],
        num_epochs=n_epochs)
    dataset_dict = dict(images=image, labels=label)
    batch_dict = tf.train.batch(
        dataset_dict, batch_size, allow_smaller_final_batch=True)
    batch_labels = batch_dict.pop('labels')
    return batch_dict, batch_labels


def model_fn(features, targets, mode, params):
    # 1. Configure the model via TensorFlow operations (same as above)
    weights_1 = tf.Variable(
            tf.truncated_normal(
                    [input_dimension, hidden1_units],
                    stddev=1.0 / np.sqrt(float(input_dimension))))
    weights_2 = tf.Variable(
            tf.truncated_normal(
                    [hidden1_units, output_dimension],
                    stddev=1.0 / np.sqrt(float(hidden1_units))))
    hidden_activations = tf.nn.relu(tf.matmul(features['images'], weights_1))
    output_activations = tf.matmul(hidden_activations, weights_2)
    
    # 2. Define the loss function for training/evaluation
    loss = tf.reduce_mean(tf.nn.l2_loss(targets - output_activations))
    
    # 3. Define the training operation/optimizer
    train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=learning_rate,
            optimizer="SGD")
    
    # 4. Generate predictions
    predictions_dict = {
        "classes":       tf.argmax(input=output_activations, axis=1),
        "probabilities": tf.nn.softmax(output_activations, name="softmax_tensor"), 
        "logits":        output_activations,
    }
    
    # Optional: Define eval metric ops; here we add an accuracy metric.
    is_correct = tf.equal(tf.argmax(input=targets, axis=1),
                                                tf.argmax(input=output_activations, axis=1))
    accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
    eval_metric_ops = { "accuracy":  accuracy}

    # 5. Return predictions/loss/train_op/eval_metric_ops in ModelFnOps object
    return tf.contrib.learn.ModelFnOps(
            mode=mode,
            predictions=predictions_dict,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=eval_metric_ops)


custom_model = tf.contrib.learn.Estimator(model_fn=model_fn)

# Train and evaluate the model.
def evaluate_model(model, input_fn):
    for i in range(6):
        max_steps = train_data.shape[0]*n_epochs // batch_size
        model.fit(input_fn=lambda: input_fn(train_data, train_labels), steps=max_steps)
        print(model.evaluate(input_fn=lambda: input_fn(test_data, test_labels),
                                                 steps=150))


evaluate_model(custom_model, input_fn)

Custom model, simplified with tf.layers

Instead of doing the matrix multiplications and everything ourselves, we can use tf.layers to simplify the definition.


In [ ]:
tf.reset_default_graph()

# Model parameters.
batch_size = 32
hidden1_units = 128
learning_rate = 0.005
input_dimension = train_data.shape[1]   # 784 = 28*28 pixels
output_dimension = train_labels.shape[1]  # 6 classes

def layers_custom_model_fn(features, targets, mode, params):
    # 1. Configure the model via TensorFlow operations (using tf.layers). Note how
    #    much simpler this is compared to defining the weight matrices and matrix
    #    multiplications by hand.
    hidden_layer = tf.layers.dense(inputs=features['images'], units=hidden1_units, activation=tf.nn.relu)
    output_layer = tf.layers.dense(inputs=hidden_layer, units=output_dimension, activation=tf.nn.relu)
    
    # 2. Define the loss function for training/evaluation
    loss = tf.losses.mean_squared_error(labels=targets, predictions=output_layer)
    
    # 3. Define the training operation/optimizer
    train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.contrib.framework.get_global_step(),
            learning_rate=learning_rate,
            optimizer="SGD")
    
    # 4. Generate predictions
    predictions_dict = {
        "classes":       tf.argmax(input=output_layer, axis=1),
        "probabilities": tf.nn.softmax(output_layer, name="softmax_tensor"), 
        "logits":        output_layer,
    }
    
    # Define eval metric ops; we can also use a pre-defined function here.
    accuracy = tf.metrics.accuracy(
        labels=tf.argmax(input=targets, axis=1),
        predictions=tf.argmax(input=output_layer, axis=1))
    eval_metric_ops = {"accuracy":  accuracy}

    # 5. Return predictions/loss/train_op/eval_metric_ops in ModelFnOps object
    return tf.contrib.learn.ModelFnOps(
        mode=mode,
       predictions=predictions_dict,
       loss=loss,
       train_op=train_op,
       eval_metric_ops=eval_metric_ops)


layers_custom_model = tf.contrib.learn.Estimator(
        model_fn=layers_custom_model_fn)

# Train and evaluate the model.
evaluate_model(layers_custom_model, input_fn)

Model using canned estimators

Instead of defining our own DNN classifier, TensorFlow supplies a number of canned estimators that can save a lot of work.


In [ ]:
tf.reset_default_graph()

# Model parameters.
hidden1_units = 128
learning_rate = 0.005
input_dimension = train_data.shape[1]   # 784 = 28*28 pixels
output_dimension = train_labels.shape[1]  # 6 classes

# Our model can be defined using just three simple lines...
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
images_column = tf.contrib.layers.real_valued_column("images")
# Task: Use the DNNClassifier Estimator to create the model in 1 line.
# SOLUTION: DNNClassifier can be used to efficiently (in lines of code) create the model.
canned_model = tf.contrib.learn.DNNClassifier(
    feature_columns=[images_column],
    hidden_units=[hidden1_units],
    n_classes=output_dimension,
    activation_fn=tf.nn.relu,
    optimizer=optimizer)

# Potential exercises: play with model parameters, e.g. add dropout

In [ ]:
# We need to change the input_fn so that it returns integers representing the classes instead of one-hot vectors.
def class_input_fn(data, labels):
    input_images = tf.constant(
        data, shape=data.shape, verify_shape=True, dtype=tf.float32)
    # The next two lines are different.
    class_labels = np.argmax(labels, axis=1)
    input_labels = tf.constant(
        class_labels, shape=class_labels.shape, verify_shape=True, dtype=tf.int32)
    image, label = tf.train.slice_input_producer(
        [input_images, input_labels], num_epochs=n_epochs)
    dataset_dict = dict(images=image, labels=label)
    batch_dict = tf.train.batch(
        dataset_dict, batch_size, allow_smaller_final_batch=True)
    batch_labels = batch_dict.pop('labels')
    return batch_dict, batch_labels

In [ ]:
# Train and evaluate the model.
evaluate_model(canned_model, class_input_fn)

Using Convolutions


In [ ]:
import tensorflow as tf
tf.reset_default_graph()

input_dimension = train_data.shape[1]   # 784 = 28*28 pixels
output_dimension = train_labels.shape[1]  # 6 classes
batch_size = 32

data_batch =  tf.placeholder("float", shape=[None, input_dimension])
label_batch = tf.placeholder("float", shape=[None, output_dimension])

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

# Task: convert the batch_size x num_pixels (784) input to batch_size, height (28), width(28), channels
# SOLUTION: reshape the input. We only have a single color channel.
image_batch = tf.reshape(data_batch, [-1, 28, 28, 1])

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])

h_conv1 = tf.nn.relu(conv2d(image_batch, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 48])
b_conv2 = bias_variable([48])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 48, 256])
b_fc1 = bias_variable([256])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*48])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Task: add dropout to fully connected layer. Add a variable to turn dropout off in eval.
# SOLUTION: add placeholder variable to deactivate dropout (keep_prob=1.0) in eval.
keep_prob = tf.placeholder(tf.float32)
# SOLUTION: add dropout to fully connected layer.
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([256, output_dimension])
b_fc2 = bias_variable([output_dimension])

output_activations = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

loss = tf.reduce_mean(
           tf.nn.softmax_cross_entropy_with_logits(labels=label_batch, 
                                                   logits=output_activations))

# Solution: Switch from GradientDescentOptimizer to AdamOptimizer
# learning_rate = 0.001
# updates = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
learning_rate = 0.001
updates = tf.train.AdamOptimizer(learning_rate).minimize(loss)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    
    random_indices = np.random.permutation(train_data.shape[0])
    n_epochs = 5  # how often to go through the training data
    max_steps = train_data.shape[0]*n_epochs // batch_size
    for i in range(max_steps):
        batch_start_idx = (i % (train_data.shape[0] // batch_size)) * batch_size
        batch_indices = random_indices[batch_start_idx:batch_start_idx+batch_size]
        batch_loss, _ = sess.run(
            [loss, updates], 
            feed_dict = {
                data_batch : train_data[batch_indices,:],
                label_batch : train_labels[batch_indices,:],
                # SOLUTION: Dropout active during training
                keep_prob : 0.5})
        if i % 100 == 0 or i == max_steps - 1:
            random_indices = np.random.permutation(train_data.shape[0])
            print("Batch-Loss at iteration {}/{}: {}".format(i, max_steps-1, batch_loss))
    
            test_predictions = sess.run(
                output_activations,
                feed_dict = {
                    data_batch : test_data,
                    label_batch : test_labels,
                    # SOLUTION: No dropout during eval
                    keep_prob : 1.0
                })
            wins = np.argmax(test_predictions, axis=1) == np.argmax(test_labels, axis=1)
            print("Accuracy on test: {}%".format(100*np.mean(wins)))

In [ ]: