notebook.community

Edit and run



In [37]:

    
import tensorflow as tf
import pandas as pd
import numpy as np

sess = tf.InteractiveSession()



In [86]:

    
# Initialization feature parameters.
word_dim = 2 # embedding dimension
feat_dim = 1 # embedding dimension
num_feats = 2
win_len = 1
vocab_size = 5 
feat_size = 10 
seq_len = win_len * 2 + 1 # Total number of tokens that will be represented in the input

# For clarity make sure that values of vectors are largely different
# Word Embeddings are in (0,1) , while Feature Embeddings are ints in (1,feat_size) 
word_embs_init = np.array([ [i / 10.] * word_dim for i in range(vocab_size) ])
feat_embs_init = np.array([ [i] * feat_dim for i in range(vocab_size, vocab_size + feat_size) ]) # do shifting for easy viewing

# Total Input Dimension
input_dim = ( seq_len  ) * ( word_dim  + (num_feats * feat_dim)) # num tokens times dense vector dims



In [87]:

    
# Now the word embedding values are different from feature imbedding values for 
print 'all words dense vectore representation'
print word_embs_init
print 'all feature dense vectore representation'
print feat_embs_init









    



all words dense vectore representation
[[ 0.   0. ]
 [ 0.1  0.1]
 [ 0.2  0.2]
 [ 0.3  0.3]
 [ 0.4  0.4]]
all feature dense vectore representation
[[ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]]



In [91]:

    
# Generate Random Integers as Token IDs for input
batch_size = 1
input_words = np.random.randint(0, vocab_size, (batch_size, seq_len) )
input_feats = np.random.randint(0, feat_size , (batch_size, seq_len, num_feats) )

print 'Input Word IDs'
print input_words
print 'Input Feature IDs'
print input_feats









    



Input Word IDs
[[3 4 4]]
Input Feature IDs
[[[9 6]
  [6 6]
  [8 1]]]



In [92]:

    
# Create the input sequence by looking up dense vector represenation of words and features, 
# then concaten
# Build Lookup Layers
word_emb = tf.Variable(word_embs_init, dtype=tf.float32)
feat_emb = tf.Variable(feat_embs_init, dtype=tf.float32)

# Convert int token ids to dense vectors via lookup table
word_lookup = tf.nn.embedding_lookup(word_emb, input_words)
feat_lookup = tf.nn.embedding_lookup(feat_emb, input_feats)

# Concatnate dense vectors into single input vector (batched)
# first reshape feature vectors, concating along last dimension
feat_flat = tf.reshape(feat_lookup, (batch_size, seq_len, -1))
word_feats = tf.concat([word_lookup, feat_flat], axis=2)
input_seq = tf.reshape(word_feats, [batch_size, -1])



sess.run(tf.global_variables_initializer())
# Now we can see the token / feature id to embedding lookup
print 'word vectors'
print sess.run(word_lookup)
print 'feat vectors'
print sess.run(feat_lookup)
print 'reshape features'
print sess.run(feat_reshaped)
print 'input seq after concatenation - Batched Input Vectors'
print sess.run(input_seq)









    



word vectors
[[[ 0.30000001  0.30000001]
  [ 0.40000001  0.40000001]
  [ 0.40000001  0.40000001]]]
feat vectors
[[[[ 14.]
   [ 11.]]

  [[ 11.]
   [ 11.]]

  [[ 13.]
   [  6.]]]]
reshape features
[[[ 9  9  9  9  9 11 11 11 11 11]
  [13 13 13 13 13 14 14 14 14 14]
  [12 12 12 12 12  9  9  9  9  9]]]
input seq after concatenation - Batched Input Vectors
[[  0.30000001   0.30000001  14.          11.           0.40000001
    0.40000001  11.          11.           0.40000001   0.40000001  13.
    6.        ]]



In [ ]:



In [ ]: