In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4) #데이터를 이쁘게 뽑아내주는 것
sess = tf.InteractiveSession()

In [3]:
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

In [3]:
with tf.variable_scope('one_cell') as scope:
    # One cell RNN input_dim (4) -> output_dim (2)
    hidden_size = 2
        #cell 생성
    cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    #print(cell)#<tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl.BasicRNNCell object at 0x7fdc881bda20>

    print(cell.output_size, cell.state_size) #2 2_
        #x_data 만들어주기
    x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
    pp.pprint(x_data.shape)
    #만들었던 cell과 x_data를 넘겨준다.
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)

    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


2 2
(1, 1, 4)
array([[[-0.4944337 , -0.40208289]]], dtype=float32)


In [4]:
with tf.variable_scope('two_sequances') as scope:
    # One cell RNN input_dim (4) -> output_dim (2). sequence: 5
    hidden_size = 2
    
    #cell 생성
    cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
    print(cell.output_size, cell.state_size)
    
    #이번에는 단어 넣기
    x_data = np.array([[h, e, l, l, o]], dtype=np.float32)
    print(x_data.shape)
    pp.pprint(x_data)
    
    #만든 cell과 x_data넣기
    outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    
    pp.pprint(outputs.eval())


2 2
(1, 5, 4)
array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]]], dtype=float32)
array([[[-0.42083621, -0.59943414],
        [-0.09289618,  0.54007256],
        [-0.54900974, -0.79854947],
        [-0.4984383 , -0.45725042],
        [-0.10496803, -0.13553779]]], dtype=float32)


In [ ]:
# LSTM사용
#3_batches 
with tf.variable_scope('3_batches') as scope:
    # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
    # 3 batches 'hello', 'eolll', 'lleel'
    
    #x_data설정, batch로 넣기
    x_data = np.array([[h, e, l, l, o],
                       [e, o, l, l, l ],
                       [l, l, e, e, l]], dtype=np.float32)
    pp.pprint(x_data)
    
    hidden_size = 2
    
    #cell 설정
    cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    
    #x_data와 cell 넣어주기
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
    
    sess.run(tf.global_variables_initializer())
    
    pp.pprint(outputs.eval())

In [14]:
#sequence_length에 값을 넣은 코드
#problem: 대부분의 시퀀스들은 같은 길이를 가지고 있지 않는다.
with tf.variable_scope('3_batches_dynamic_length') as scope:
    # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3
    # 3 batches 'hello', 'eolll', 'lleel'
    x_data = np.array([[h, e, l, l, o],
                       [e, o, l, l, l],
                       [l, l, e, e, l]], dtype=np.float32)
    pp.pprint(x_data)
    
    hidden_size = 2
    cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    #seqeunce를 3을 주면 위에서 부터 3개는 값을 주고 나머지 2개는 0으로 준다.
    outputs, _states = tf.nn.dynamic_rnn(
        cell, x_data, sequence_length=[5,3,4], dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())
    #pp.pprint(_states.eval())
    #확실하게 없는 데이터의 값 0으로 만들어줘서 loss가 잘 동작하게 만들어준다. 다이나믹 rnn의 장점


array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]],

       [[ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.]],

       [[ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.]]], dtype=float32)
array([[[ 0.01200307, -0.06400099],
        [-0.06923316, -0.08992109],
        [-0.19088814, -0.16441   ],
        [-0.26740667, -0.21937366],
        [-0.13658734, -0.16970572]],

       [[-0.08115824, -0.05406357],
        [-0.06181009, -0.06055027],
        [-0.19931245, -0.15297818],
        [ 0.        ,  0.        ],
        [ 0.        ,  0.        ]],

       [[-0.15925387, -0.11616778],
        [-0.2504341 , -0.19081444],
        [-0.20630276, -0.17603026],
        [-0.15604143, -0.14868453],
        [ 0.        ,  0.        ]]], dtype=float32)

In [15]:
#solution : 모든 시퀀스와 레이블에 제로 벡터와 레이블을 둘러싸인다.
#
with tf.variable_scope('initial_state') as scope:
    batch_size = 3
    x_data = np.array([[h, e, l, l, o],
                      [e, o, l, l, l],
                      [l, l, e, e, l]], dtype=np.float32)
    pp.pprint(x_data)
    
    # One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch: 3
    hidden_size=2
    cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
    
    
    initial_state = cell.zero_state(batch_size, tf.float32)
    pp.pprint(initial_state)
    
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
                                         initial_state=initial_state, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    
    pp.pprint(outputs.eval())
   # pp.pprint(_states.eval())


array([[[ 1.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  1.]],

       [[ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.]],

       [[ 0.,  0.,  1.,  0.],
        [ 0.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  1.,  0.,  0.],
        [ 0.,  0.,  1.,  0.]]], dtype=float32)
LSTMStateTuple(c=<tf.Tensor 'initial_state/BasicLSTMCellZeroState/zeros:0' shape=(3, 2) dtype=float32>, h=<tf.Tensor 'initial_state/BasicLSTMCellZeroState/zeros_1:0' shape=(3, 2) dtype=float32>)
array([[[ 0.08405151, -0.09496456],
        [ 0.11951527, -0.1138202 ],
        [ 0.03925625,  0.01903542],
        [-0.02667238,  0.10234888],
        [ 0.09924657,  0.01830677]],

       [[ 0.06987614, -0.06336639],
        [ 0.18585655, -0.09835634],
        [ 0.0540128 ,  0.00064523],
        [-0.0169289 ,  0.09034473],
        [-0.06446094,  0.14728373]],

       [[-0.05255075,  0.0962592 ],
        [-0.08721706,  0.15399344],
        [ 0.02119706,  0.04275266],
        [ 0.08355803, -0.04225467],
        [ 0.00772699,  0.06397828]]], dtype=float32)

In [9]:
# Create input data
batch_size=3
sequence_length=5
input_dim=3

x_data = np.arange(45, dtype=np.float32).reshape(batch_size, sequence_length, input_dim)
pp.pprint(x_data)  # batch, sequence_length, input_dim
print(x_data.shape)


array([[[  0.,   1.,   2.],
        [  3.,   4.,   5.],
        [  6.,   7.,   8.],
        [  9.,  10.,  11.],
        [ 12.,  13.,  14.]],

       [[ 15.,  16.,  17.],
        [ 18.,  19.,  20.],
        [ 21.,  22.,  23.],
        [ 24.,  25.,  26.],
        [ 27.,  28.,  29.]],

       [[ 30.,  31.,  32.],
        [ 33.,  34.,  35.],
        [ 36.,  37.,  38.],
        [ 39.,  40.,  41.],
        [ 42.,  43.,  44.]]], dtype=float32)
(3, 5, 3)

In [5]:
#output data with zero_pad
with tf.variable_scope('generated_data') as scope:
    # One cell RNN input_dim (3) -> output_dim (5). sequence: 5, batch: 3
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    initial_state = cell.zero_state(batch_size, tf.float32)
    #위에 있는 x_data넣어줌
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
                                         initial_state=initial_state, dtype=tf.float32)
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())


array([[[ -1.03524268e-01,   1.21041581e-01,  -1.39336213e-01,
          -4.63617267e-03,  -9.04870406e-02],
        [ -1.68390125e-02,   3.28191146e-02,  -3.06317329e-01,
           2.22605184e-01,  -4.43154089e-02],
        [ -1.01194310e-03,  -1.46114687e-03,  -2.14735955e-01,
           2.13646322e-01,   3.45933475e-02],
        [ -5.29306672e-05,  -1.98277924e-03,  -1.48191825e-01,
           1.41403764e-01,   5.74170761e-02],
        [ -2.65945573e-06,  -4.34540038e-04,  -1.05737559e-01,
           8.60147625e-02,   3.68497968e-02]],

       [[ -1.32890207e-07,  -5.56626801e-05,  -7.62307644e-02,
           4.46360111e-02,   1.31913135e-02],
        [ -6.73131062e-09,  -1.20311179e-05,  -5.10988422e-02,
           2.87124440e-02,   8.10873508e-03],
        [ -3.38649636e-10,  -2.06746358e-06,  -3.46572474e-02,
           1.66509487e-02,   3.88925127e-03],
        [ -1.70435246e-11,  -3.45664233e-07,  -2.33379323e-02,
           9.59014986e-03,   1.78979163e-03],
        [ -8.58027787e-13,  -5.76358268e-08,  -1.56257320e-02,
           5.52985072e-03,   8.18239350e-04]],

       [[ -4.31698033e-14,  -7.30456451e-09,  -1.04437834e-02,
           3.13307694e-03,   2.84224981e-04],
        [ -2.17601472e-15,  -1.54610091e-09,  -6.93584280e-03,
           1.84797787e-03,   1.64279423e-04],
        [ -1.09602649e-16,  -2.66344780e-10,  -4.60804999e-03,
           1.07094098e-03,   7.74105501e-05],
        [ -5.52095951e-18,  -4.46513000e-11,  -3.05849290e-03,
           6.21051586e-04,   3.54729782e-05],
        [ -2.78117793e-19,  -7.45875410e-12,  -2.02870066e-03,
           3.60402366e-04,   1.61927019e-05]]], dtype=float32)

In [9]:
with tf.variable_scope('dynamic_rnn') as scope:
    cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
    outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32,
                                         sequence_length=[1, 3, 2])
    # lentgh 1 for batch 1, lentgh 2 for batch 2
    
    print("dynamic rnn: ", outputs)
    
    sess.run(tf.global_variables_initializer())
    pp.pprint(outputs.eval())  # batch size, unrolling (time), hidden_size


dynamic rnn:  Tensor("dynamic_rnn/rnn/transpose:0", shape=(3, 5, 5), dtype=float32)
array([[[  3.39594483e-02,  -6.85234666e-02,  -1.50369048e-01,
          -6.50415868e-02,  -2.26832852e-02],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[ -9.11737516e-05,  -5.82499354e-07,  -3.06825910e-04,
          -8.45805289e-07,  -4.11379278e-06],
        [ -8.48582204e-05,  -4.45100419e-08,  -1.17199779e-04,
          -8.83092412e-07,  -4.25946496e-07],
        [ -6.54496034e-05,  -3.39848327e-09,  -3.19798564e-05,
          -8.55753683e-07,  -4.39908554e-08],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]],

       [[ -2.66648250e-08,  -1.51166839e-12,  -2.32038488e-07,
          -2.82572728e-12,  -4.83561143e-11],
        [ -2.43239722e-08,  -1.15386948e-13,  -8.31450961e-08,
          -2.93821304e-12,  -4.98992297e-12],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00],
        [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           0.00000000e+00,   0.00000000e+00]]], dtype=float32)

In [35]:
# flattern based softmax
hidden_size=3
sequence_length=5
batch_size=3
num_classes=5

pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2
x_data = x_data.reshape(-1, hidden_size)
#pp.pprint(x_data.shape) (15,3)

softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes)
#print(softmax_w.shape)(3,5)

outputs = np.matmul(x_data, softmax_w)
#print(outputs.shape)#15,5
outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class
#+print(outputs.shape)#(3,5,5)
pp.pprint(outputs)


array([[  0.,   1.,   2.],
       [  3.,   4.,   5.],
       [  6.,   7.,   8.],
       [  9.,  10.,  11.],
       [ 12.,  13.,  14.],
       [ 15.,  16.,  17.],
       [ 18.,  19.,  20.],
       [ 21.,  22.,  23.],
       [ 24.,  25.,  26.],
       [ 27.,  28.,  29.],
       [ 30.,  31.,  32.],
       [ 33.,  34.,  35.],
       [ 36.,  37.,  38.],
       [ 39.,  40.,  41.],
       [ 42.,  43.,  44.]], dtype=float32)
(15, 3)
(3, 5)
(15, 5)
(3, 5, 5)
array([[[   25.,    28.,    31.,    34.,    37.],
        [   70.,    82.,    94.,   106.,   118.],
        [  115.,   136.,   157.,   178.,   199.],
        [  160.,   190.,   220.,   250.,   280.],
        [  205.,   244.,   283.,   322.,   361.]],

       [[  250.,   298.,   346.,   394.,   442.],
        [  295.,   352.,   409.,   466.,   523.],
        [  340.,   406.,   472.,   538.,   604.],
        [  385.,   460.,   535.,   610.,   685.],
        [  430.,   514.,   598.,   682.,   766.]],

       [[  475.,   568.,   661.,   754.,   847.],
        [  520.,   622.,   724.,   826.,   928.],
        [  565.,   676.,   787.,   898.,  1009.],
        [  610.,   730.,   850.,   970.,  1090.],
        [  655.,   784.,   913.,  1042.,  1171.]]], dtype=float32)

In [53]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])
print(y_data)
# [batch_size, sequence_length, emb_dim ]
prediction = tf.constant([[[0.2, 0.7], [0.6, 0.2], [0.2, 0.9]]], dtype=tf.float32)
print(prediction)
# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)
print(weights)
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=prediction, targets=y_data, weights=weights)
print(sequence_loss)
sess.run(tf.global_variables_initializer())
print("Loss: ", sequence_loss.eval())


Tensor("Const_51:0", shape=(1, 3), dtype=int32)
Tensor("Const_52:0", shape=(1, 3, 2), dtype=float32)
Tensor("Const_53:0", shape=(1, 3), dtype=float32)
Tensor("sequence_loss_17/truediv:0", shape=(), dtype=float32)
Loss:  0.596759

In [54]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])

# [batch_size, sequence_length, emb_dim ]
prediction1 = tf.constant([[[0.3, 0.7], [0.3, 0.7], [0.3, 0.7]]], dtype=tf.float32)
prediction2 = tf.constant([[[0.1, 0.9], [0.1, 0.9], [0.1, 0.9]]], dtype=tf.float32)

prediction3 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32)
prediction4 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32)

# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)

sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights)
sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights)
sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sequence_loss4 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)

sess.run(tf.global_variables_initializer())
print("Loss1: ", sequence_loss1.eval(),
      "Loss2: ", sequence_loss2.eval(),
      "Loss3: ", sequence_loss3.eval(),
      "Loss4: ", sequence_loss4.eval())


Loss1:  0.513015 Loss2:  0.371101 Loss3:  1.31326 Loss4:  1.31326

In [ ]:


In [ ]:


In [ ]: