In [ ]:
# Created 2016-04-03
# Tensorflow version: 0.7
In [1]:
import numpy as np
import tensorflow as tf
In [2]:
# ---- Snippet 1 ----
# Convert a vector of IDs into a matrix of sparse binary vectors.
#
# The IDs ranges from [0 .. (num_ids - 1)], and the resulting binary vectors are of size num_ids.
# ID ranges from [0 .. 4]
num_ids = 5
# The constant dictionary defines the vectors that each ID will map to.
# In this example, it is defined as an identity matrix since we are
# converting the input IDs to binary vectors.
#
# [[ 1. 0. 0. 0. 0.]
# [ 0. 1. 0. 0. 0.]
# [ 0. 0. 1. 0. 0.]
# [ 0. 0. 0. 1. 0.]
# [ 0. 0. 0. 0. 1.]]
#
# In real world use case, it can be an arbitrary matrix and a variable instead of constant.
dictionary = tf.constant(np.identity(num_ids))
# The input IDs. In the real world this is usually a variable or a placeholder of int32.
input_ids = tf.constant([0, 3, 0, 2, 4, 1, 1, 3])
# Applying embedding_lookup.
converted_vectors = tf.nn.embedding_lookup(dictionary, input_ids)
with tf.Session() as sess:
print(sess.run(converted_vectors))
# Expected Output:
# [[ 1. 0. 0. 0. 0.]
# [ 0. 0. 0. 1. 0.]
# [ 1. 0. 0. 0. 0.]
# [ 0. 0. 1. 0. 0.]
# [ 0. 0. 0. 0. 1.]
# [ 0. 1. 0. 0. 0.]
# [ 0. 1. 0. 0. 0.]
# [ 0. 0. 0. 1. 0.]]
In [11]:
# ---- Snippet 2 ----
# When input is a tensor (instead of a vector)
#
# Now the dictionary is 6 vectors.
dictionary = tf.constant([[0.1, 0.1, 0.1],
[0.2, 0.2, 0.2],
[0.3, 0.3, 0.3],
[0.4, 0.4, 0.4],
[0.5, 0.5, 0.5],
[0.6, 0.6, 0.6]])
# The input IDs are now a matrix (tensor) of shape 2 * 4
input_ids = tf.constant([[0, 1, 2, 0],
[1, 1, 2, 2]])
# Converted is then 2 * 4 * 3
converted = tf.nn.embedding_lookup(dictionary, input_ids)
with tf.Session() as sess:
print(sess.run(converted))