In [ ]:
# Created 2016-04-05
# Tensorflow version: 0.7

In [2]:
import numpy as np
import tensorflow as tf

In [13]:
# Snippet 1
# Simple softmax of a size-2 vector.

# Note that tf.nn.softmax REQUIRES the input to be of rank 2, which means
# that np.array([0.5, 0.2]) won't work here.
#
# In reality logits is expected to be a matrix of batch_size * logit_size.
logits_a = np.array([[0.5, 0.2]])

# Calculate the softmax based on logits_a. According to the softmax definition,
# the expected result is [exp(0.5), exp(0.2)] / (exp(0.5) + exp(0.2))
result_a = tf.nn.softmax(logits_a)
expected_result_a = np.array([[np.exp(0.5), np.exp(0.2)]]) / (np.exp(0.5) + np.exp(0.2))

with tf.Session() as sess:
    # They should evaluate to the same vector (tensor).
    print(expected_result_a)
    print(sess.run(result_a))


[[ 0.57444252  0.42555748]]
[[ 0.57444252  0.42555748]]

In [15]:
# Snippet 2
# Simple softmax of a bigger vector.

# Note that tf.nn.softmax REQUIRES the input to be of rank 2, and the logits
# below has a batch_size of 1 for illustration purpose.
logits_b = np.array([[0.77, 0.77, 0.77, 0.77, 0.77]])

# Based on the softmax definition, the above logits should have a uniform softmax.
# Softmax result are usually interpreted as probaility and sum to 1.
# This means that the expected result will be [0.2, 0.2, 0.2, 0.2, 0.2]
result_b = tf.nn.softmax(logits_b)
expected_result_b = np.array([[0.2, 0.2, 0.2, 0.2, 0.2]])

with tf.Session() as sess:
    # They should evaluate to the same vector (tensor).
    print(expected_result_b)
    print(sess.run(result_b))


[[ 0.2  0.2  0.2  0.2  0.2]]
[[ 0.2  0.2  0.2  0.2  0.2]]

In [36]:
# Snippet 3
# Softmax and cross entropy.

# Note that tf.nn.softmax REQUIRES the input to be of rank 2, and the logits
# below has a batch_size of 1 for illustration purpose.
logits = np.array([[0.95, 0.95]])
# The probability representation of the labels has the same shape as logits.
# Unlike logits, it is usually a batch of sparse vectors, where each vector
# has 1 on the correct position and 0 on the others.
labels = np.array([[1.0, 0.0]])
# Sometimes we chose to represent each label as its label index (an integer)
# instead of a probability. In our case the first (and the only) label has 
# 1.0 on index 0, so the sparse label representation will be "0".
sparse_labels = np.array([0], dtype=np.int32)

# Calculate the expected softmax and cross entropy.
#
# According to the definition of softmax, applying the softmax on it will 
# yield [[0.5, 0.5]]
expected_softmax = np.array([[0.5, 0.5]])
# Calcuate the cross entropy based on its definition. For details, see
# https://en.wikipedia.org/wiki/Cross_entropy
expected_cross_entropy = np.array([- 1.0 * np.log(0.5) - 0.0 * np.log(0.5)])

# Operation that produces the cross entropy from logits and labels.
cross_entropy_a = tf.nn.softmax_cross_entropy_with_logits(logits, labels)
# Operation that produces the cross entropy from logits and labels.
cross_entropy_b = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, sparse_labels)

with tf.Session() as sess:
    # They should evaluate to the same vector (tensor).
    print(expected_cross_entropy)
    print(sess.run(cross_entropy_a))
    print(sess.run(cross_entropy_b))


[ 0.69314718]
[ 0.69314718]
[ 0.69314718]