In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4) #데이터를 이쁘게 뽑아내주는 것
sess = tf.InteractiveSession()
In [3]:
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]
In [3]:
with tf.variable_scope('one_cell') as scope:
# One cell RNN input_dim (4) -> output_dim (2)
hidden_size = 2
#cell 생성
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
#print(cell)#<tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl.BasicRNNCell object at 0x7fdc881bda20>
print(cell.output_size, cell.state_size) #2 2_
#x_data 만들어주기
x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
pp.pprint(x_data.shape)
#만들었던 cell과 x_data를 넘겨준다.
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [4]:
with tf.variable_scope('two_sequances') as scope:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5
hidden_size = 2
#cell 생성
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
print(cell.output_size, cell.state_size)
#이번에는 단어 넣기
x_data = np.array([[h, e, l, l, o]], dtype=np.float32)
print(x_data.shape)
pp.pprint(x_data)
#만든 cell과 x_data넣기
outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [ ]:
# LSTM사용
#3_batches
with tf.variable_scope('3_batches') as scope:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
#x_data설정, batch로 넣기
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l ],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
hidden_size = 2
#cell 설정
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
#x_data와 cell 넣어주기
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [14]:
#sequence_length에 값을 넣은 코드
#problem: 대부분의 시퀀스들은 같은 길이를 가지고 있지 않는다.
with tf.variable_scope('3_batches_dynamic_length') as scope:
# One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
hidden_size = 2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
#seqeunce를 3을 주면 위에서 부터 3개는 값을 주고 나머지 2개는 0으로 준다.
outputs, _states = tf.nn.dynamic_rnn(
cell, x_data, sequence_length=[5,3,4], dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
#pp.pprint(_states.eval())
#확실하게 없는 데이터의 값 0으로 만들어줘서 loss가 잘 동작하게 만들어준다. 다이나믹 rnn의 장점
In [15]:
#solution : 모든 시퀀스와 레이블에 제로 벡터와 레이블을 둘러싸인다.
#
with tf.variable_scope('initial_state') as scope:
batch_size = 3
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
# One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch: 3
hidden_size=2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
pp.pprint(initial_state)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
initial_state=initial_state, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
# pp.pprint(_states.eval())
In [9]:
# Create input data
batch_size=3
sequence_length=5
input_dim=3
x_data = np.arange(45, dtype=np.float32).reshape(batch_size, sequence_length, input_dim)
pp.pprint(x_data) # batch, sequence_length, input_dim
print(x_data.shape)
In [5]:
#output data with zero_pad
with tf.variable_scope('generated_data') as scope:
# One cell RNN input_dim (3) -> output_dim (5). sequence: 5, batch: 3
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
#위에 있는 x_data넣어줌
outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
initial_state=initial_state, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [9]:
with tf.variable_scope('dynamic_rnn') as scope:
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32,
sequence_length=[1, 3, 2])
# lentgh 1 for batch 1, lentgh 2 for batch 2
print("dynamic rnn: ", outputs)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size
In [35]:
# flattern based softmax
hidden_size=3
sequence_length=5
batch_size=3
num_classes=5
pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2
x_data = x_data.reshape(-1, hidden_size)
#pp.pprint(x_data.shape) (15,3)
softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes)
#print(softmax_w.shape)(3,5)
outputs = np.matmul(x_data, softmax_w)
#print(outputs.shape)#15,5
outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class
#+print(outputs.shape)#(3,5,5)
pp.pprint(outputs)
In [53]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])
print(y_data)
# [batch_size, sequence_length, emb_dim ]
prediction = tf.constant([[[0.2, 0.7], [0.6, 0.2], [0.2, 0.9]]], dtype=tf.float32)
print(prediction)
# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)
print(weights)
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=prediction, targets=y_data, weights=weights)
print(sequence_loss)
sess.run(tf.global_variables_initializer())
print("Loss: ", sequence_loss.eval())
In [54]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])
# [batch_size, sequence_length, emb_dim ]
prediction1 = tf.constant([[[0.3, 0.7], [0.3, 0.7], [0.3, 0.7]]], dtype=tf.float32)
prediction2 = tf.constant([[[0.1, 0.9], [0.1, 0.9], [0.1, 0.9]]], dtype=tf.float32)
prediction3 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32)
prediction4 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32)
# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)
sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights)
sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights)
sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sequence_loss4 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sess.run(tf.global_variables_initializer())
print("Loss1: ", sequence_loss1.eval(),
"Loss2: ", sequence_loss2.eval(),
"Loss3: ", sequence_loss3.eval(),
"Loss4: ", sequence_loss4.eval())
In [ ]:
In [ ]:
In [ ]: