In [1]:
# http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/
# http://learningtensorflow.com/index.html
# http://suriyadeepan.github.io/2016-12-31-practical-seq2seq/
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import pprint
pp = pprint.PrettyPrinter(indent=4)
sess = tf.InteractiveSession()
In [2]:
# One hot encoding for each char in 'hello'
h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]
In [3]:
with tf.variable_scope('one_cell') as scope:
# One cell RNN input_dim (4) -> output_dim (2)
hidden_size = 2
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
print(cell.output_size, cell.state_size)
x_data = np.array([[h]], dtype=np.float32) # x_data = [[[1,0,0,0]]]
pp.pprint(x_data)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [4]:
with tf.variable_scope('two_sequances') as scope:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5
hidden_size = 2
cell = tf.contrib.rnn.BasicRNNCell(num_units=hidden_size)
x_data = np.array([[h, e, l, l, o]], dtype=np.float32)
print(x_data.shape)
pp.pprint(x_data)
outputs, states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [5]:
with tf.variable_scope('3_batches') as scope:
# One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
hidden_size = 2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(
cell, x_data, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [6]:
with tf.variable_scope('3_batches_dynamic_length') as scope:
# One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch 3
# 3 batches 'hello', 'eolll', 'lleel'
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
hidden_size = 2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(
cell, x_data, sequence_length=[5,3,4], dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [7]:
with tf.variable_scope('initial_state') as scope:
batch_size = 3
x_data = np.array([[h, e, l, l, o],
[e, o, l, l, l],
[l, l, e, e, l]], dtype=np.float32)
pp.pprint(x_data)
# One cell RNN input_dim (4) -> output_dim (5). sequence: 5, batch: 3
hidden_size=2
cell = rnn.BasicLSTMCell(num_units=hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
initial_state=initial_state, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [8]:
# Create input data
batch_size=3
sequence_length=5
input_dim=3
x_data = np.arange(45, dtype=np.float32).reshape(batch_size, sequence_length, input_dim)
pp.pprint(x_data) # batch, sequence_length, input_dim
In [9]:
with tf.variable_scope('generated_data') as scope:
# One cell RNN input_dim (3) -> output_dim (5). sequence: 5, batch: 3
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data,
initial_state=initial_state, dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval())
In [10]:
with tf.variable_scope('MultiRNNCell') as scope:
# Make rnn
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
cell = rnn.MultiRNNCell([cell] * 3, state_is_tuple=True) # 3 layers
# rnn in/out
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32)
print("dynamic rnn: ", outputs)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size
In [11]:
with tf.variable_scope('dynamic_rnn') as scope:
cell = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
outputs, _states = tf.nn.dynamic_rnn(cell, x_data, dtype=tf.float32,
sequence_length=[1, 3, 2])
# lentgh 1 for batch 1, lentgh 2 for batch 2
print("dynamic rnn: ", outputs)
sess.run(tf.global_variables_initializer())
pp.pprint(outputs.eval()) # batch size, unrolling (time), hidden_size
In [12]:
with tf.variable_scope('bi-directional') as scope:
# bi-directional rnn
cell_fw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
cell_bw = rnn.BasicLSTMCell(num_units=5, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x_data,
sequence_length=[2, 3, 1],
dtype=tf.float32)
sess.run(tf.global_variables_initializer())
pp.pprint(sess.run(outputs))
pp.pprint(sess.run(states))
In [13]:
# flattern based softmax
hidden_size=3
sequence_length=5
batch_size=3
num_classes=5
pp.pprint(x_data) # hidden_size=3, sequence_length=4, batch_size=2
x_data = x_data.reshape(-1, hidden_size)
pp.pprint(x_data)
softmax_w = np.arange(15, dtype=np.float32).reshape(hidden_size, num_classes)
outputs = np.matmul(x_data, softmax_w)
outputs = outputs.reshape(-1, sequence_length, num_classes) # batch, seq, class
pp.pprint(outputs)
In [14]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])
# [batch_size, sequence_length, emb_dim ]
prediction = tf.constant([[[0.2, 0.7], [0.6, 0.2], [0.2, 0.9]]], dtype=tf.float32)
# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)
sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=prediction, targets=y_data, weights=weights)
sess.run(tf.global_variables_initializer())
print("Loss: ", sequence_loss.eval())
In [15]:
# [batch_size, sequence_length]
y_data = tf.constant([[1, 1, 1]])
# [batch_size, sequence_length, emb_dim ]
prediction1 = tf.constant([[[0.3, 0.7], [0.3, 0.7], [0.3, 0.7]]], dtype=tf.float32)
prediction2 = tf.constant([[[0.1, 0.9], [0.1, 0.9], [0.1, 0.9]]], dtype=tf.float32)
prediction3 = tf.constant([[[1, 0], [1, 0], [1, 0]]], dtype=tf.float32)
prediction4 = tf.constant([[[0, 1], [1, 0], [0, 1]]], dtype=tf.float32)
# [batch_size * sequence_length]
weights = tf.constant([[1, 1, 1]], dtype=tf.float32)
sequence_loss1 = tf.contrib.seq2seq.sequence_loss(prediction1, y_data, weights)
sequence_loss2 = tf.contrib.seq2seq.sequence_loss(prediction2, y_data, weights)
sequence_loss3 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sequence_loss4 = tf.contrib.seq2seq.sequence_loss(prediction3, y_data, weights)
sess.run(tf.global_variables_initializer())
print("Loss1: ", sequence_loss1.eval(),
"Loss2: ", sequence_loss2.eval(),
"Loss3: ", sequence_loss3.eval(),
"Loss4: ", sequence_loss4.eval())