This notebook contains code to visualize a learned embedding using TensorBoard. The result is extremely cool. After you've run the notebook, open up TensorBoard and head to the 'embeddings' tab. Then, use T-SNE.
For convenience, we've provided a sprite containing images for this dataset inside the data directory. If you'd like to create your own sprite, see the extras folder for an example.
In [ ]:
import os
import tensorflow as tf
In [ ]:
LOGDIR = './graphs'
# Remeber to clear LOGDIR before running this script.
# For convenience, you can uncomment the following two lines
# to delete LOGDIR. Please be *careful*, as this will delete
# the directory tree!
# import shutil
# if os.path.exists(LOGDIR): shutil.rmtree(LOGDIR)
In [ ]:
tf.reset_default_graph()
sess = tf.Session()
In [ ]:
mnist = tf.contrib.learn.datasets.mnist.read_data_sets('/tmp/data', one_hot=True)
In [ ]:
LEARNING_RATE = 1E-4
HIDDEN1_SIZE = 1024
HIDDEN2_SIZE = 1024
NUM_CLASSES = 10
NUM_PIXELS = 28 * 28
TRAIN_STEPS = 2001
BATCH_SIZE = 100
In [ ]:
# Function to create a fully connected layer
def fc_layer(input, size_out, name="fc", activation=None):
with tf.name_scope(name):
size_in = int(input.shape[1])
w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="weights")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="bias")
wx_plus_b = tf.matmul(input, w) + b
if activation: return activation(wx_plus_b)
return wx_plus_b
In [ ]:
with tf.name_scope('input'):
images = tf.placeholder(tf.float32, [None, NUM_PIXELS], name="pixels")
labels = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="labels")
In [ ]:
# flatten the 28x28 images into 784 dimensional arrays
# flattened = tf.reshape(x_image, [-1, NUM_PIXELS])
fc1 = fc_layer(images, HIDDEN1_SIZE, "fc1", activation=tf.nn.relu)
fc2 = fc_layer(fc1, HIDDEN2_SIZE, "fc2", activation=tf.nn.relu)
logits = fc_layer(fc2, NUM_CLASSES, name="output")
with tf.name_scope("loss"):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels), name="loss")
tf.summary.scalar("loss", loss)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
summ = tf.summary.merge_all()
# we'll use the output from the second fully connected layer as the embedding
embedding = tf.Variable(tf.zeros([HIDDEN1_SIZE, HIDDEN2_SIZE]), name="test_embedding")
assignment = embedding.assign(fc2)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR)
writer.add_graph(sess.graph)
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
# for convenience, we've provided these files
# although if you wanted, you could add code to produce them yourself
embedding_config.sprite.image_path = "../data/sprite_1024.png"
embedding_config.metadata_path = "../data/labels_1024.tsv"
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([28, 28])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
for i in range(TRAIN_STEPS):
batch = mnist.train.next_batch(BATCH_SIZE)
if i % 100 == 0:
[train_accuracy, s] = sess.run([accuracy, summ], feed_dict={images: batch[0], labels: batch[1]})
writer.add_summary(s, i)
print (i, train_accuracy)
if i % 500 == 0:
sess.run(assignment, feed_dict={images: mnist.test.images[:1024], labels: mnist.test.labels[:1024]})
saver.save(sess, os.path.join(LOGDIR, "model.ckpt"), i)
sess.run(train_step, feed_dict={images: batch[0], labels: batch[1]})
writer.close()