In [ ]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
In [ ]:
# This is for graph visualization.
from IPython.display import clear_output, Image, display, HTML
def strip_consts(graph_def, max_const_size=32):
"""Strip large constant values from graph_def."""
strip_def = tf.GraphDef()
for n0 in graph_def.node:
n = strip_def.node.add()
n.MergeFrom(n0)
if n.op == 'Const':
tensor = n.attr['value'].tensor
size = len(tensor.tensor_content)
if size > max_const_size:
tensor.tensor_content = "<stripped %d bytes>"%size
return strip_def
def show_graph(graph_def, max_const_size=32):
"""Visualize TensorFlow graph."""
if hasattr(graph_def, 'as_graph_def'):
graph_def = graph_def.as_graph_def()
strip_def = strip_consts(graph_def, max_const_size=max_const_size)
code = """
<script>
function load() {{
document.getElementById("{id}").pbtxt = {data};
}}
</script>
<link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
<div style="height:600px">
<tf-graph-basic id="{id}"></tf-graph-basic>
</div>
""".format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))
iframe = """
<iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
""".format(code.replace('"', '"'))
display(HTML(iframe))
In [ ]:
DATA_DIR = '../data/'
data_filename = os.path.join(DATA_DIR, "zoo.npz")
data = np.load(open(data_filename))
train_data = data['arr_0']
train_labels = data['arr_1']
test_data = data['arr_2']
test_labels = data['arr_3']
del data
print("Data shapes: ", test_data.shape, test_labels.shape, train_data.shape, train_labels.shape)
In [ ]:
tf.reset_default_graph()
input_dimension = train_data.shape[1] # 784 = 28*28 pixels
output_dimension = train_labels.shape[1] # 10 classes
batch_size = 32
hidden1_units = 128
data_batch = tf.placeholder("float", shape=[None, input_dimension], name="data")
label_batch = tf.placeholder("float", shape=[None, output_dimension], name="labels")
weights_1 = tf.Variable(
tf.truncated_normal(
[input_dimension, hidden1_units],
stddev=1.0 / np.sqrt(float(input_dimension))),
name='weights_1')
# Task: Add Bias to first layer
# Task: Use Cross-Entropy instead of Squared Loss
# SOLUTION: Create biases variable.
biases_1 = tf.Variable(
tf.truncated_normal(
[hidden1_units],
stddev=1.0 / np.sqrt(float(hidden1_units))),
name='biases_1')
weights_2 = tf.Variable(
tf.truncated_normal(
[hidden1_units, output_dimension],
stddev=1.0 / np.sqrt(float(hidden1_units))),
name='weights_2')
# SOLUTION: Add the bias term to the first layer
wx_b = tf.add(tf.matmul(data_batch, weights_1), biases_1)
hidden_activations = tf.nn.relu(wx_b)
output_activations = tf.nn.tanh(tf.matmul(hidden_activations, weights_2))
# SOLUTION: Replace the l2 loss with softmax cross entropy.
with tf.name_scope("loss"):
# loss = tf.nn.l2_loss(label_batch - output_activations)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
labels=label_batch,
logits=output_activations))
show_graph(tf.get_default_graph().as_graph_def())
We can run this graph by feeding in batches of examples using a feed_dict. The keys of the feed_dict are placeholders we've defined previously. The first argument of session.run is the tensor that we're computing. Only parts of the graph required to produce this value will be executed.
In [ ]:
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
random_indices = np.random.permutation(train_data.shape[0])
for i in range(1000):
batch_start_idx = (i % (train_data.shape[0] // batch_size)) * batch_size
batch_indices = random_indices[batch_start_idx:batch_start_idx + batch_size]
batch_loss = sess.run(
loss,
feed_dict = {
data_batch : train_data[batch_indices,:],
label_batch : train_labels[batch_indices,:]
})
if (i + 1) % 100 == 0:
print("Loss at iteration {}: {}".format(i+1, batch_loss))
No learning yet but we get the losses per batch. We need to add an optimizer to the graph.
In [ ]:
# Task: Replace GradientDescentOptimizer with AdagradOptimizer and a 0.1 learning rate.
# learning_rate = 0.005
# updates = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
# SOLUTION: Replace GradientDescentOptimizer
learning_rate = 0.1
updates = tf.train.AdagradOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
random_indices = np.random.permutation(train_data.shape[0])
n_epochs = 10 # how often do to go through the training data
max_steps = train_data.shape[0]*n_epochs // batch_size
for i in range(max_steps):
batch_start_idx = (i % (train_data.shape[0] // batch_size)) * batch_size
batch_indices = random_indices[batch_start_idx:batch_start_idx+batch_size]
batch_loss, _ = sess.run(
[loss, updates],
feed_dict = {
data_batch : train_data[batch_indices,:],
label_batch : train_labels[batch_indices,:]
})
if i % 200 == 0 or i == max_steps - 1:
random_indices = np.random.permutation(train_data.shape[0])
print("Batch-Loss at iteration {}: {}".format(i, batch_loss))
test_predictions = sess.run(
output_activations,
feed_dict = {
data_batch : test_data,
label_batch : test_labels
})
wins = np.argmax(test_predictions, axis=1) == np.argmax(test_labels, axis=1)
print("Accuracy on test: {}%".format(100*np.mean(wins)))
Loss going down, Accuracy going up! \o/
Notice how batch loss differs between batches.
In [ ]:
tf.reset_default_graph()
# Model parameters.
batch_size = 32
hidden1_units = 128
learning_rate = 0.005
input_dimension = train_data.shape[1] # 784 = 28*28 pixels
output_dimension = train_labels.shape[1] # 6 classes
n_epochs = 10 # how often do to go through the training data
def input_fn(data, labels):
input_images = tf.constant(data, shape=data.shape, verify_shape=True, dtype=tf.float32)
input_labels = tf.constant(labels, shape=labels.shape, verify_shape=True, dtype=tf.float32)
image, label = tf.train.slice_input_producer(
[input_images, input_labels],
num_epochs=n_epochs)
dataset_dict = dict(images=image, labels=label)
batch_dict = tf.train.batch(
dataset_dict, batch_size, allow_smaller_final_batch=True)
batch_labels = batch_dict.pop('labels')
return batch_dict, batch_labels
def model_fn(features, targets, mode, params):
# 1. Configure the model via TensorFlow operations (same as above)
weights_1 = tf.Variable(
tf.truncated_normal(
[input_dimension, hidden1_units],
stddev=1.0 / np.sqrt(float(input_dimension))))
weights_2 = tf.Variable(
tf.truncated_normal(
[hidden1_units, output_dimension],
stddev=1.0 / np.sqrt(float(hidden1_units))))
hidden_activations = tf.nn.relu(tf.matmul(features['images'], weights_1))
output_activations = tf.matmul(hidden_activations, weights_2)
# 2. Define the loss function for training/evaluation
loss = tf.reduce_mean(tf.nn.l2_loss(targets - output_activations))
# 3. Define the training operation/optimizer
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=learning_rate,
optimizer="SGD")
# 4. Generate predictions
predictions_dict = {
"classes": tf.argmax(input=output_activations, axis=1),
"probabilities": tf.nn.softmax(output_activations, name="softmax_tensor"),
"logits": output_activations,
}
# Optional: Define eval metric ops; here we add an accuracy metric.
is_correct = tf.equal(tf.argmax(input=targets, axis=1),
tf.argmax(input=output_activations, axis=1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
eval_metric_ops = { "accuracy": accuracy}
# 5. Return predictions/loss/train_op/eval_metric_ops in ModelFnOps object
return tf.contrib.learn.ModelFnOps(
mode=mode,
predictions=predictions_dict,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
custom_model = tf.contrib.learn.Estimator(model_fn=model_fn)
# Train and evaluate the model.
def evaluate_model(model, input_fn):
for i in range(6):
max_steps = train_data.shape[0]*n_epochs // batch_size
model.fit(input_fn=lambda: input_fn(train_data, train_labels), steps=max_steps)
print(model.evaluate(input_fn=lambda: input_fn(test_data, test_labels),
steps=150))
evaluate_model(custom_model, input_fn)
In [ ]:
tf.reset_default_graph()
# Model parameters.
batch_size = 32
hidden1_units = 128
learning_rate = 0.005
input_dimension = train_data.shape[1] # 784 = 28*28 pixels
output_dimension = train_labels.shape[1] # 6 classes
def layers_custom_model_fn(features, targets, mode, params):
# 1. Configure the model via TensorFlow operations (using tf.layers). Note how
# much simpler this is compared to defining the weight matrices and matrix
# multiplications by hand.
hidden_layer = tf.layers.dense(inputs=features['images'], units=hidden1_units, activation=tf.nn.relu)
output_layer = tf.layers.dense(inputs=hidden_layer, units=output_dimension, activation=tf.nn.relu)
# 2. Define the loss function for training/evaluation
loss = tf.losses.mean_squared_error(labels=targets, predictions=output_layer)
# 3. Define the training operation/optimizer
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.contrib.framework.get_global_step(),
learning_rate=learning_rate,
optimizer="SGD")
# 4. Generate predictions
predictions_dict = {
"classes": tf.argmax(input=output_layer, axis=1),
"probabilities": tf.nn.softmax(output_layer, name="softmax_tensor"),
"logits": output_layer,
}
# Define eval metric ops; we can also use a pre-defined function here.
accuracy = tf.metrics.accuracy(
labels=tf.argmax(input=targets, axis=1),
predictions=tf.argmax(input=output_layer, axis=1))
eval_metric_ops = {"accuracy": accuracy}
# 5. Return predictions/loss/train_op/eval_metric_ops in ModelFnOps object
return tf.contrib.learn.ModelFnOps(
mode=mode,
predictions=predictions_dict,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
layers_custom_model = tf.contrib.learn.Estimator(
model_fn=layers_custom_model_fn)
# Train and evaluate the model.
evaluate_model(layers_custom_model, input_fn)
In [ ]:
tf.reset_default_graph()
# Model parameters.
hidden1_units = 128
learning_rate = 0.005
input_dimension = train_data.shape[1] # 784 = 28*28 pixels
output_dimension = train_labels.shape[1] # 6 classes
# Our model can be defined using just three simple lines...
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
images_column = tf.contrib.layers.real_valued_column("images")
# Task: Use the DNNClassifier Estimator to create the model in 1 line.
# SOLUTION: DNNClassifier can be used to efficiently (in lines of code) create the model.
canned_model = tf.contrib.learn.DNNClassifier(
feature_columns=[images_column],
hidden_units=[hidden1_units],
n_classes=output_dimension,
activation_fn=tf.nn.relu,
optimizer=optimizer)
# Potential exercises: play with model parameters, e.g. add dropout
In [ ]:
# We need to change the input_fn so that it returns integers representing the classes instead of one-hot vectors.
def class_input_fn(data, labels):
input_images = tf.constant(
data, shape=data.shape, verify_shape=True, dtype=tf.float32)
# The next two lines are different.
class_labels = np.argmax(labels, axis=1)
input_labels = tf.constant(
class_labels, shape=class_labels.shape, verify_shape=True, dtype=tf.int32)
image, label = tf.train.slice_input_producer(
[input_images, input_labels], num_epochs=n_epochs)
dataset_dict = dict(images=image, labels=label)
batch_dict = tf.train.batch(
dataset_dict, batch_size, allow_smaller_final_batch=True)
batch_labels = batch_dict.pop('labels')
return batch_dict, batch_labels
In [ ]:
# Train and evaluate the model.
evaluate_model(canned_model, class_input_fn)
In [ ]:
import tensorflow as tf
tf.reset_default_graph()
input_dimension = train_data.shape[1] # 784 = 28*28 pixels
output_dimension = train_labels.shape[1] # 6 classes
batch_size = 32
data_batch = tf.placeholder("float", shape=[None, input_dimension])
label_batch = tf.placeholder("float", shape=[None, output_dimension])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
# Task: convert the batch_size x num_pixels (784) input to batch_size, height (28), width(28), channels
# SOLUTION: reshape the input. We only have a single color channel.
image_batch = tf.reshape(data_batch, [-1, 28, 28, 1])
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(image_batch, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 48])
b_conv2 = bias_variable([48])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 48, 256])
b_fc1 = bias_variable([256])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*48])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Task: add dropout to fully connected layer. Add a variable to turn dropout off in eval.
# SOLUTION: add placeholder variable to deactivate dropout (keep_prob=1.0) in eval.
keep_prob = tf.placeholder(tf.float32)
# SOLUTION: add dropout to fully connected layer.
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([256, output_dimension])
b_fc2 = bias_variable([output_dimension])
output_activations = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=label_batch,
logits=output_activations))
# Solution: Switch from GradientDescentOptimizer to AdamOptimizer
# learning_rate = 0.001
# updates = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
learning_rate = 0.001
updates = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
random_indices = np.random.permutation(train_data.shape[0])
n_epochs = 5 # how often to go through the training data
max_steps = train_data.shape[0]*n_epochs // batch_size
for i in range(max_steps):
batch_start_idx = (i % (train_data.shape[0] // batch_size)) * batch_size
batch_indices = random_indices[batch_start_idx:batch_start_idx+batch_size]
batch_loss, _ = sess.run(
[loss, updates],
feed_dict = {
data_batch : train_data[batch_indices,:],
label_batch : train_labels[batch_indices,:],
# SOLUTION: Dropout active during training
keep_prob : 0.5})
if i % 100 == 0 or i == max_steps - 1:
random_indices = np.random.permutation(train_data.shape[0])
print("Batch-Loss at iteration {}/{}: {}".format(i, max_steps-1, batch_loss))
test_predictions = sess.run(
output_activations,
feed_dict = {
data_batch : test_data,
label_batch : test_labels,
# SOLUTION: No dropout during eval
keep_prob : 1.0
})
wins = np.argmax(test_predictions, axis=1) == np.argmax(test_labels, axis=1)
print("Accuracy on test: {}%".format(100*np.mean(wins)))
In [ ]: