In [ ]:
# Created 2016-04-06
# Tensorflow version: 0.7
# Like sparse_softmax_cross_entropy_with_logits,
# tf.seq2seq.sequence_loss_by_example calculates the softmax cross entropy.
# (I am not sure why softmax/cross entropy is not mentioned in its name)
#
# The difference is that the calculation is done on a sequence of logits,
# where the sequence is a Python list.
#
# In fact, one thing to help understand it is that in the simplest form,
# it is just calling sparse_softmax_cross_entropy_with_logits on every
# element of the sequence respectively.
#
# It is mainly used for Recurrent Nueral Networks.
In [2]:
import numpy as np
import tensorflow as tf
In [9]:
# Snippet 1
# logits is a list of length 4, where 4 is called the sequence_length.
# Each element in the list is a matrix of shape: batch_size * classes.
# In this example batch_size = 2, classes = 3.
# This means we will have 3 different integers labels: 0, 1, and 2.
logits = [np.array([[1.5, 1.5, 1.5],
[0.7, 0.9, 0.2]]),
np.array([[0.5, 0.2, 1.7],
[0.1, 1.2, 0.5]]),
np.array([[0.7, 0.3, 1.2],
[1.1, 0.2, 0.5]]),
np.array([[0.2, 0.4, 0.1],
[0.8, 0.8, 0.5]])]
# labels is a list of length 4 (sequence_length), similar to logits.
# Each element in labels is an integer vector of batch_size.
# Usually it represents the groud truth class label for each batch in
# each element of the sequence. Integer labels ranges from 0 to 2.
#
# This is similar to sparse_softmax_cross_entropy_with_logits.
labels = [np.array([0, 1], dtype=np.int32),
np.array([2, 0], dtype=np.int32),
np.array([0, 0], dtype=np.int32),
np.array([2, 1], dtype=np.int32)]
# weights is a list of length 4 (sequence_length).
# Each element in weights is an 1-D vector of batch_size.
#
# It is used to weight the cross entropy, and when set to all 1s,
# the cross entropy is not weighted.
weights = [np.array([1.0, 1.0]), np.array([1.0, 1.0]),
np.array([1.0, 1.0]), np.array([1.0, 1.0])]
# Finally call sequence_loss_by_example. Note that average_across_time_steps
# is by default set to True (the name is self-explaining).
loss = tf.nn.seq2seq.sequence_loss_by_example(logits, labels, weights)
# ---------- Expected Value ----------
#
# Now let's do what sequence_loss_by_example, step by step.
# First, we calculate the cross entropies for each time step and each batch.
# The result is going to be a list of 4 (sequence_length), where each element
# is a vector of size 2 (batch_size)
expected_cross_entropies = [tf.nn.sparse_softmax_cross_entropy_with_logits(
single_logits, single_labels) for single_logits, single_labels in zip(logits, labels)]
# We then sum the cross entropies across time steps, and divide the result by 4 (sequence_length).
expected_cross_entropies_average_over_time_steps = tf.accumulate_n(expected_cross_entropies) / 4
with tf.Session() as sess:
print(sess.run(expected_cross_entropies))
# We can see that they are the same
print(sess.run(expected_cross_entropies_average_over_time_steps))
print(sess.run(loss))