In [ ]:
# Created 2016-04-05
# Tensorflow version: 0.7
In [2]:
import numpy as np
import tensorflow as tf
In [13]:
# Snippet 1
# Simple softmax of a size-2 vector.
# Note that tf.nn.softmax REQUIRES the input to be of rank 2, which means
# that np.array([0.5, 0.2]) won't work here.
#
# In reality logits is expected to be a matrix of batch_size * logit_size.
logits_a = np.array([[0.5, 0.2]])
# Calculate the softmax based on logits_a. According to the softmax definition,
# the expected result is [exp(0.5), exp(0.2)] / (exp(0.5) + exp(0.2))
result_a = tf.nn.softmax(logits_a)
expected_result_a = np.array([[np.exp(0.5), np.exp(0.2)]]) / (np.exp(0.5) + np.exp(0.2))
with tf.Session() as sess:
# They should evaluate to the same vector (tensor).
print(expected_result_a)
print(sess.run(result_a))
In [15]:
# Snippet 2
# Simple softmax of a bigger vector.
# Note that tf.nn.softmax REQUIRES the input to be of rank 2, and the logits
# below has a batch_size of 1 for illustration purpose.
logits_b = np.array([[0.77, 0.77, 0.77, 0.77, 0.77]])
# Based on the softmax definition, the above logits should have a uniform softmax.
# Softmax result are usually interpreted as probaility and sum to 1.
# This means that the expected result will be [0.2, 0.2, 0.2, 0.2, 0.2]
result_b = tf.nn.softmax(logits_b)
expected_result_b = np.array([[0.2, 0.2, 0.2, 0.2, 0.2]])
with tf.Session() as sess:
# They should evaluate to the same vector (tensor).
print(expected_result_b)
print(sess.run(result_b))
In [36]:
# Snippet 3
# Softmax and cross entropy.
# Note that tf.nn.softmax REQUIRES the input to be of rank 2, and the logits
# below has a batch_size of 1 for illustration purpose.
logits = np.array([[0.95, 0.95]])
# The probability representation of the labels has the same shape as logits.
# Unlike logits, it is usually a batch of sparse vectors, where each vector
# has 1 on the correct position and 0 on the others.
labels = np.array([[1.0, 0.0]])
# Sometimes we chose to represent each label as its label index (an integer)
# instead of a probability. In our case the first (and the only) label has
# 1.0 on index 0, so the sparse label representation will be "0".
sparse_labels = np.array([0], dtype=np.int32)
# Calculate the expected softmax and cross entropy.
#
# According to the definition of softmax, applying the softmax on it will
# yield [[0.5, 0.5]]
expected_softmax = np.array([[0.5, 0.5]])
# Calcuate the cross entropy based on its definition. For details, see
# https://en.wikipedia.org/wiki/Cross_entropy
expected_cross_entropy = np.array([- 1.0 * np.log(0.5) - 0.0 * np.log(0.5)])
# Operation that produces the cross entropy from logits and labels.
cross_entropy_a = tf.nn.softmax_cross_entropy_with_logits(logits, labels)
# Operation that produces the cross entropy from logits and labels.
cross_entropy_b = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, sparse_labels)
with tf.Session() as sess:
# They should evaluate to the same vector (tensor).
print(expected_cross_entropy)
print(sess.run(cross_entropy_a))
print(sess.run(cross_entropy_b))