In [ ]:
# Created 2016-04-03
# Tensorflow version: 0.7

In [1]:
import numpy as np
import tensorflow as tf

In [2]:
# ---- Snippet 1 ----
# Convert a vector of IDs into a matrix of sparse binary vectors.
#
# The IDs ranges from [0 .. (num_ids - 1)], and the resulting binary vectors are of size num_ids.

# ID ranges from [0 .. 4]
num_ids = 5

# The constant dictionary defines the vectors that each ID will map to.
# In this example, it is defined as an identity matrix since we are 
# converting the input IDs to binary vectors.
#
# [[ 1.  0.  0.  0.  0.]
#  [ 0.  1.  0.  0.  0.]
#  [ 0.  0.  1.  0.  0.]
#  [ 0.  0.  0.  1.  0.]
#  [ 0.  0.  0.  0.  1.]]
#
# In real world use case, it can be an arbitrary matrix and a variable instead of constant.
dictionary = tf.constant(np.identity(num_ids))

# The input IDs. In the real world this is usually a variable or a placeholder of int32.
input_ids = tf.constant([0, 3, 0, 2, 4, 1, 1, 3])

# Applying embedding_lookup.
converted_vectors = tf.nn.embedding_lookup(dictionary, input_ids)

with tf.Session() as sess:
    print(sess.run(converted_vectors))
    
# Expected Output:
# [[ 1.  0.  0.  0.  0.]
#  [ 0.  0.  0.  1.  0.]
#  [ 1.  0.  0.  0.  0.]
#  [ 0.  0.  1.  0.  0.]
#  [ 0.  0.  0.  0.  1.]
#  [ 0.  1.  0.  0.  0.]
#  [ 0.  1.  0.  0.  0.]
#  [ 0.  0.  0.  1.  0.]]


[[ 1.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 1.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  1.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.]]

In [11]:
# ---- Snippet 2 ----
# When input is a tensor (instead of a vector)
#

# Now the dictionary is 6 vectors.
dictionary = tf.constant([[0.1, 0.1, 0.1], 
                          [0.2, 0.2, 0.2],
                          [0.3, 0.3, 0.3], 
                          [0.4, 0.4, 0.4],
                          [0.5, 0.5, 0.5],
                          [0.6, 0.6, 0.6]])

# The input IDs are now a matrix (tensor) of shape 2 * 4
input_ids = tf.constant([[0, 1, 2, 0],
                         [1, 1, 2, 2]])
 
# Converted is then 2 * 4 * 3 
converted = tf.nn.embedding_lookup(dictionary, input_ids)
             
with tf.Session() as sess:
    print(sess.run(converted))


[[[ 0.1         0.1         0.1       ]
  [ 0.2         0.2         0.2       ]
  [ 0.30000001  0.30000001  0.30000001]
  [ 0.1         0.1         0.1       ]]

 [[ 0.2         0.2         0.2       ]
  [ 0.2         0.2         0.2       ]
  [ 0.30000001  0.30000001  0.30000001]
  [ 0.30000001  0.30000001  0.30000001]]]