In [1]:
# http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/
# http://learningtensorflow.com/index.html
# http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/

import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4)
sess = tf.InteractiveSession()

In [2]:
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]


In [3]:
with tf.variable_scope('one_cell') as scope:
    # One cell RNN input_dim (4) -> output_dim (2)
    hidden_size = 2
    cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    print(cell.output_size, cell.state_size)

    x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
    pp.pprint(x_data)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


2 2
array([[[ 1.,  0.,  0.,  0.]]], dtype=float32)
array([[[-0.64275026,  0.24673119]]], dtype=float32)


In [4]:
with tf.variable_scope('two_sequances') as scope:
    # One cell RNN input_dim (4) -> output_dim (2). sequence: 5
    hidden_size = 2
    cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    x_data = np.array([[h, e, l, l, o]], dtype=np.float32)
    print(x_data.shape)
    pp.pprint(x_data)
    outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


(1, 5, 4)
array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]]], dtype=float32)
array([[[-0.63120317,  0.41186932],
        [ 0.01558458, -0.45617309],
        [ 0.44469985, -0.59367669],
        [ 0.64881748, -0.41516042],
        [ 0.83822179,  0.37093368]]], dtype=float32)


In [5]:
with tf.variable_scope('3_batches') as scope:
    # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
    # 3 batches 'hello', 'eolll', 'lleel'
    x_data = np.array([[h, e, l, l, o],
                       [e, o, l, l, l],
                       [l, l, e, e, l]], dtype=np.float32)
    pp.pprint(x_data)
    
    hidden_size = 2
    cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    outputs, _states = tf.nn.dynamic_rnn(
        cell, x_data, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]],

       [[ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.]],

       [[ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.]]], dtype=float32)
array([[[-0.03546662, -0.06584486],
        [-0.16727528,  0.06242408],
        [ 0.00654588,  0.08693196],
        [ 0.09833847,  0.10082317],
        [-0.00898898,  0.16917825]],

       [[-0.13631152,  0.11714432],
        [-0.11994348,  0.19006969],
        [-0.02463737,  0.14005762],
        [ 0.07547557,  0.13548863],
        [ 0.13291949,  0.13163276]],

       [[ 0.10307095,  0.04621947],
        [ 0.15578267,  0.07512111],
        [-0.01867282,  0.17094727],
        [-0.15131146,  0.24143013],
        [-0.00291315,  0.21415649]]], dtype=float32)

In [6]:
with tf.variable_scope('3_batches_dynamic_length') as scope:
    # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3
    # 3 batches 'hello', 'eolll', 'lleel'
    x_data = np.array([[h, e, l, l, o],
                       [e, o, l, l, l],
                       [l, l, e, e, l]], dtype=np.float32)
    pp.pprint(x_data)
    
    hidden_size = 2
    cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    outputs, _states = tf.nn.dynamic_rnn(
        cell, x_data, sequence_length=[5,3,4], dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]],

       [[ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.]],

       [[ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.]]], dtype=float32)
array([[[-0.03968918,  0.11553776],
        [-0.09938851,  0.11129049],
        [-0.14957708,  0.11721011],
        [-0.17505002,  0.12350345],
        [-0.10960916,  0.22753865]],

       [[-0.08773316,  0.04811305],
        [-0.07149508,  0.15299585],
        [-0.1218669 ,  0.12461264],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]],

       [[-0.08248876,  0.05933138],
        [-0.13340995,  0.09111218],
        [-0.17076224,  0.12116605],
        [-0.20993207,  0.13754749],
        [ 0.        ,  0.        ]]], dtype=float32)

In [7]:
with tf.variable_scope('initial_state') as scope:
    batch_size = 3
    x_data = np.array([[h, e, l, l, o],
                      [e, o, l, l, l],
                      [l, l, e, e, l]], dtype=np.float32)
    pp.pprint(x_data)
    
    # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch: 3
    hidden_size=2
    cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
                                         initial_state=initial_state, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]],

       [[ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.]],

       [[ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.]]], dtype=float32)
array([[[ 0.00923393, -0.13979502],
        [ 0.07680964, -0.11229528],
        [ 0.02873478, -0.10772899],
        [-0.05031542, -0.09425307],
        [-0.09281293, -0.16921911]],

       [[ 0.08408841, -0.05797429],
        [ 0.03247163, -0.1436279 ],
        [-0.04775349, -0.11127697],
        [-0.1245755 , -0.10845563],
        [-0.19524372, -0.11676301]],

       [[-0.05309285, -0.02686353],
        [-0.11044604, -0.05279329],
        [ 0.00974518, -0.0891296 ],
        [ 0.08241539, -0.10400318],
        [ 0.04044458, -0.09862207]]], dtype=float32)

In [8]:
# Create input data
batch_size=3
sequence_length=5
input_dim=3

x_data = np.arange(45, dtype=np.float32).reshape(batch_size, sequence_length, input_dim)
pp.pprint(x_data)  # batch, sequence_length, input_dim


array([[[  0.,   1.,   2.],
        [  3.,   4.,   5.],
        [  6.,   7.,   8.],
        [  9.,  10.,  11.],
        [ 12.,  13.,  14.]],

       [[ 15.,  16.,  17.],
        [ 18.,  19.,  20.],
        [ 21.,  22.,  23.],
        [ 24.,  25.,  26.],
        [ 27.,  28.,  29.]],

       [[ 30.,  31.,  32.],
        [ 33.,  34.,  35.],
        [ 36.,  37.,  38.],
        [ 39.,  40.,  41.],
        [ 42.,  43.,  44.]]], dtype=float32)

In [9]:
with tf.variable_scope('generated_data') as scope:
    # One cell RNN input_dim (3) -> output_dim (5). sequence: 5, batch: 3
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
                                         initial_state=initial_state, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


array([[[ -1.23668008e-01,  -5.07251509e-02,   1.13128014e-01,
          -1.75255552e-01,   9.91979316e-02],
        [ -1.03775285e-01,  -5.57221798e-03,   5.22784777e-02,
          -2.56952912e-01,   1.65204480e-02],
        [ -4.39195000e-02,  -4.11785266e-04,   1.44369062e-02,
          -2.29260936e-01,   1.11237261e-03],
        [ -1.54171968e-02,  -2.81557877e-05,   3.56699061e-03,
          -1.40950114e-01,   4.93779953e-05],
        [ -5.05342009e-03,  -1.84964392e-06,   8.43062007e-04,
          -6.71216846e-02,   1.86955151e-06]],

       [[ -1.22282468e-03,   2.22401460e-14,   8.62367742e-05,
          -1.92919523e-02,   6.63199273e-08],
        [ -5.12603205e-04,   1.53546522e-15,   3.19461578e-05,
          -1.19721089e-02,   3.25560467e-09],
        [ -1.73634820e-04,   9.58751101e-17,   8.65337825e-06,
          -6.00134116e-03,   1.47692761e-10],
        [ -5.72142344e-05,   5.80502781e-18,   2.09686141e-06,
          -2.95647699e-03,   6.73065770e-12],
        [ -1.87951573e-05,   3.41926015e-19,   4.85925625e-07,
          -1.47120981e-03,   3.07895121e-13]],

       [[ -4.69985116e-06,   2.59926926e-26,   3.95634601e-08,
          -7.18495809e-04,   1.40691613e-14],
        [ -1.95512894e-06,   1.53442122e-27,   1.54792055e-08,
          -3.70928698e-04,   6.46595782e-16],
        [ -6.63015499e-07,   8.28377858e-29,   4.40251213e-09,
          -1.87046811e-04,   2.96441348e-17],
        [ -2.18773309e-07,   4.31129650e-30,   1.10677900e-09,
          -9.43549603e-05,   1.35910055e-18],
        [ -7.19227486e-08,   2.16742962e-31,   2.62928596e-10,
          -4.76072491e-05,   6.23100809e-20]]], dtype=float32)

In [10]:
with tf.variable_scope('MultiRNNCell') as scope:
    # Make rnn
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    cell = rnn.MultiRNNCell([cell] * 3, state_is_tuple=True) # 3 layers

    # rnn in/out
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
    print("dynamic rnn: ", outputs)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())  # batch size, unrolling (time), hidden_size


dynamic rnn:  Tensor("MultiRNNCell/rnn/transpose:0", shape=(3, 5, 5), dtype=float32)
array([[[ -3.18181328e-03,   6.82551297e-04,  -6.86313864e-03,
          -6.52104442e-04,  -3.74536775e-03],
        [ -1.26600666e-02,   3.06148897e-03,  -2.03890335e-02,
          -1.05455623e-03,  -1.09536052e-02],
        [ -2.46284939e-02,   5.03476290e-03,  -3.49918716e-02,
           1.33014296e-03,  -1.72535740e-02],
        [ -3.26516628e-02,   4.39822255e-03,  -4.67237085e-02,
           9.41789895e-03,  -1.96171720e-02],
        [ -3.30378041e-02,   1.04174472e-03,  -5.48766889e-02,
           2.48874109e-02,  -1.77136399e-02]],

       [[ -2.76247528e-03,   9.84093407e-04,  -2.32222048e-03,
           1.30811462e-03,   7.29710038e-04],
        [ -5.43045439e-03,   1.31156389e-03,  -4.67744144e-03,
           5.95421065e-03,   5.68564516e-03],
        [ -3.27468570e-03,  -4.46750841e-04,  -5.98882977e-03,
           1.64761990e-02,   1.37086725e-02],
        [  5.19860210e-03,  -3.70193040e-03,  -7.13404501e-03,
           3.35971899e-02,   2.25476827e-02],
        [  1.89618766e-02,  -6.97603356e-03,  -9.12398752e-03,
           5.67113608e-02,   3.07749026e-02]],

       [[ -2.98052910e-03,   6.98839314e-04,  -5.79679036e-04,
           2.88053358e-04,   5.27186133e-03],
        [ -3.86652839e-03,  -6.25130706e-05,   3.28230351e-04,
           4.51471703e-03,   1.57567821e-02],
        [  1.12394930e-03,  -2.94125779e-03,   2.14405963e-03,
           1.48640843e-02,   2.78518554e-02],
        [  1.21760676e-02,  -6.77783974e-03,   3.21676442e-03,
           3.14395986e-02,   3.90961394e-02],
        [  2.76563261e-02,  -1.00875879e-02,   2.58208276e-03,
           5.33442199e-02,   4.85301539e-02]]], dtype=float32)

In [11]:
with tf.variable_scope('dynamic_rnn') as scope:
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32,
                                         sequence_length=[1, 3, 2])
    # lentgh 1 for batch 1, lentgh 2 for batch 2
    
    print("dynamic rnn: ", outputs)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())  # batch size, unrolling (time), hidden_size


dynamic rnn:  Tensor("dynamic_rnn/rnn/transpose:0", shape=(3, 5, 5), dtype=float32)
array([[[ -1.24277316e-01,   6.71604648e-02,  -3.18024978e-02,
          -6.61257878e-02,  -1.73859328e-01],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[ -4.20496523e-01,  -2.00571522e-01,  -7.10825205e-01,
          -2.93397534e-05,  -9.52338451e-05],
        [ -3.96949798e-01,  -3.09383363e-01,  -9.41727877e-01,
          -6.07349330e-06,  -2.30159003e-05],
        [ -3.78725439e-01,  -3.83842200e-01,  -9.89462614e-01,
          -9.44136730e-07,  -4.46963031e-06],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[ -3.56718779e-01,  -5.30802384e-02,  -7.57560551e-01,
          -2.62110889e-09,  -1.68145462e-08],
        [ -3.17460954e-01,  -8.03319365e-02,  -9.61753905e-01,
          -4.95694152e-10,  -3.86364229e-09],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32)

In [12]:
with tf.variable_scope('bi-directional') as scope:
    # bi-directional rnn
    cell_fw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    cell_bw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)

    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_data,
                                                      sequence_length=[2, 3, 1],
                                                      dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(sess.run(outputs))
    pp.pprint(sess.run(states))


(   array([[[  1.91283643e-01,   1.66566283e-01,  -1.43510297e-01,
          -3.91087681e-02,   8.19874927e-02],
        [  4.39284950e-01,   3.03380191e-01,  -4.37777787e-02,
          -4.80161339e-01,   5.20635508e-02],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[  2.75898695e-01,   2.19099417e-01,  -1.06631105e-05,
          -7.59029806e-01,   1.35574592e-05],
        [  5.32995164e-01,   2.23740548e-01,  -6.73943816e-07,
          -7.65739441e-01,   2.46314448e-06],
        [  7.06968784e-01,   1.93451777e-01,  -6.96958935e-08,
          -7.63786614e-01,   3.57649327e-07],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[  2.44515434e-01,   1.00493275e-01,  -1.56144528e-10,
          -7.61585832e-01,   4.06837619e-10],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32),
    array([[[  5.86579978e-01,   1.11791678e-01,  -3.71089369e-01,
          -1.55904694e-02,   1.02585182e-01],
        [  5.53058267e-01,   1.88321248e-02,  -2.26903543e-01,
           4.15821373e-03,   6.19057864e-02],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[  7.55207121e-01,   1.90972378e-06,  -9.32306275e-02,
           6.59374280e-07,   3.55114113e-03],
        [  7.55450308e-01,   1.57105418e-07,  -5.95693775e-02,
           3.67493485e-08,   1.27227418e-03],
        [  7.53851235e-01,   9.17093068e-09,  -2.28918456e-02,
           1.86351001e-09,   5.74842677e-04],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[  7.60004759e-01,   5.34228217e-12,  -5.78520307e-03,
           6.05749066e-13,   3.36771554e-05],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32))
(   LSTMStateTuple(c=array([[  7.39887953e-01,   3.58146578e-01,  -1.93617597e-01,
         -5.97393930e-01,   1.83204621e-01],
       [  1.08004236e+00,   1.95921108e-01,  -1.28685043e-03,
         -1.00526524e+00,   4.19715128e-04],
       [  2.72140056e-01,   1.00833625e-01,  -1.77492344e-04,
         -9.99980450e-01,   1.64960911e-05]], dtype=float32), h=array([[  4.39284950e-01,   3.03380191e-01,  -4.37777787e-02,
         -4.80161339e-01,   5.20635508e-02],
       [  7.06968784e-01,   1.93451777e-01,  -6.96958935e-08,
         -7.63786614e-01,   3.57649327e-07],
       [  2.44515434e-01,   1.00493275e-01,  -1.56144528e-10,
         -7.61585832e-01,   4.06837619e-10]], dtype=float32)),
    LSTMStateTuple(c=array([[  1.09122550e+00,   2.14314222e-01,  -1.01869190e+00,
         -3.93253230e-02,   2.36113101e-01],
       [  1.03115773e+00,   1.92802031e-06,  -2.99388552e+00,
          2.55972713e-01,   8.80647302e-01],
       [  9.99994695e-01,   5.34284336e-12,  -9.99999166e-01,
          4.95702997e-02,   8.23795438e-01]], dtype=float32), h=array([[  5.86579978e-01,   1.11791678e-01,  -3.71089369e-01,
         -1.55904694e-02,   1.02585182e-01],
       [  7.55207121e-01,   1.90972378e-06,  -9.32306275e-02,
          6.59374280e-07,   3.55114113e-03],
       [  7.60004759e-01,   5.34228217e-12,  -5.78520307e-03,
          6.05749066e-13,   3.36771554e-05]], dtype=float32)))

In [13]:
# flattern based softmax
hidden_size=3
sequence_length=5
batch_size=3
num_classes=5

pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2
x_data = x_data.reshape(-1, hidden_size)
pp.pprint(x_data)

softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes)
outputs = np.matmul(x_data, softmax_w)
outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class
pp.pprint(outputs)


array([[[  0.,   1.,   2.],
        [  3.,   4.,   5.],
        [  6.,   7.,   8.],
        [  9.,  10.,  11.],
        [ 12.,  13.,  14.]],

       [[ 15.,  16.,  17.],
        [ 18.,  19.,  20.],
        [ 21.,  22.,  23.],
        [ 24.,  25.,  26.],
        [ 27.,  28.,  29.]],

       [[ 30.,  31.,  32.],
        [ 33.,  34.,  35.],
        [ 36.,  37.,  38.],
        [ 39.,  40.,  41.],
        [ 42.,  43.,  44.]]], dtype=float32)
array([[  0.,   1.,   2.],
       [  3.,   4.,   5.],
       [  6.,   7.,   8.],
       [  9.,  10.,  11.],
       [ 12.,  13.,  14.],
       [ 15.,  16.,  17.],
       [ 18.,  19.,  20.],
       [ 21.,  22.,  23.],
       [ 24.,  25.,  26.],
       [ 27.,  28.,  29.],
       [ 30.,  31.,  32.],
       [ 33.,  34.,  35.],
       [ 36.,  37.,  38.],
       [ 39.,  40.,  41.],
       [ 42.,  43.,  44.]], dtype=float32)
array([[[   25.,    28.,    31.,    34.,    37.],
        [   70.,    82.,    94.,   106.,   118.],
        [  115.,   136.,   157.,   178.,   199.],
        [  160.,   190.,   220.,   250.,   280.],
        [  205.,   244.,   283.,   322.,   361.]],

       [[  250.,   298.,   346.,   394.,   442.],
        [  295.,   352.,   409.,   466.,   523.],
        [  340.,   406.,   472.,   538.,   604.],
        [  385.,   460.,   535.,   610.,   685.],
        [  430.,   514.,   598.,   682.,   766.]],

       [[  475.,   568.,   661.,   754.,   847.],
        [  520.,   622.,   724.,   826.,   928.],
        [  565.,   676.,   787.,   898.,  1009.],
        [  610.,   730.,   850.,   970.,  1090.],
        [  655.,   784.,   913.,  1042.,  1171.]]], dtype=float32)

In [14]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])

# [batch_size, sequence_length, emb_dim ]
prediction = tf.constant([[[0.2, 0.7], [0.6, 0.2], [0.2, 0.9]]], dtype=tf.float32)

# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)

sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=prediction, targets=y_data, weights=weights)
sess.run(tf.global_variables_initializer())
print("Loss: ", sequence_loss.eval())


Loss:  0.596759

In [15]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])

# [batch_size, sequence_length, emb_dim ]
prediction1 = tf.constant([[[0.3, 0.7], [0.3, 0.7], [0.3, 0.7]]], dtype=tf.float32)
prediction2 = tf.constant([[[0.1, 0.9], [0.1, 0.9], [0.1, 0.9]]], dtype=tf.float32)

prediction3 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32)
prediction4 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32)

# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)

sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights)
sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights)
sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sequence_loss4 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)

sess.run(tf.global_variables_initializer())
print("Loss1: ", sequence_loss1.eval(),
      "Loss2: ", sequence_loss2.eval(),
      "Loss3: ", sequence_loss3.eval(),
      "Loss4: ", sequence_loss4.eval())


Loss1:  0.513015 Loss2:  0.371101 Loss3:  1.31326 Loss4:  1.31326