In [37]:
import tensorflow as tf
import pandas as pd
import numpy as np
sess = tf.InteractiveSession()
In [86]:
# Initialization feature parameters.
word_dim = 2 # embedding dimension
feat_dim = 1 # embedding dimension
num_feats = 2
win_len = 1
vocab_size = 5
feat_size = 10
seq_len = win_len * 2 + 1 # Total number of tokens that will be represented in the input
# For clarity make sure that values of vectors are largely different
# Word Embeddings are in (0,1) , while Feature Embeddings are ints in (1,feat_size)
word_embs_init = np.array([ [i / 10.] * word_dim for i in range(vocab_size) ])
feat_embs_init = np.array([ [i] * feat_dim for i in range(vocab_size, vocab_size + feat_size) ]) # do shifting for easy viewing
# Total Input Dimension
input_dim = ( seq_len ) * ( word_dim + (num_feats * feat_dim)) # num tokens times dense vector dims
In [87]:
# Now the word embedding values are different from feature imbedding values for
print 'all words dense vectore representation'
print word_embs_init
print 'all feature dense vectore representation'
print feat_embs_init
In [91]:
# Generate Random Integers as Token IDs for input
batch_size = 1
input_words = np.random.randint(0, vocab_size, (batch_size, seq_len) )
input_feats = np.random.randint(0, feat_size , (batch_size, seq_len, num_feats) )
print 'Input Word IDs'
print input_words
print 'Input Feature IDs'
print input_feats
In [92]:
# Create the input sequence by looking up dense vector represenation of words and features,
# then concaten
# Build Lookup Layers
word_emb = tf.Variable(word_embs_init, dtype=tf.float32)
feat_emb = tf.Variable(feat_embs_init, dtype=tf.float32)
# Convert int token ids to dense vectors via lookup table
word_lookup = tf.nn.embedding_lookup(word_emb, input_words)
feat_lookup = tf.nn.embedding_lookup(feat_emb, input_feats)
# Concatnate dense vectors into single input vector (batched)
# first reshape feature vectors, concating along last dimension
feat_flat = tf.reshape(feat_lookup, (batch_size, seq_len, -1))
word_feats = tf.concat([word_lookup, feat_flat], axis=2)
input_seq = tf.reshape(word_feats, [batch_size, -1])
sess.run(tf.global_variables_initializer())
# Now we can see the token / feature id to embedding lookup
print 'word vectors'
print sess.run(word_lookup)
print 'feat vectors'
print sess.run(feat_lookup)
print 'reshape features'
print sess.run(feat_reshaped)
print 'input seq after concatenation - Batched Input Vectors'
print sess.run(input_seq)
In [ ]:
In [ ]: