In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
tf.__version__
Out[2]:
In [3]:
np.__version__
Out[3]:
In [4]:
author = "kyubyong. https://github.com/Kyubyong/tensorflow-exercises"
In [5]:
np.random.seed(0)
Q1. Let's practice the seq2seq framework with a simple example. In this example, we will take the last state of the encoder as the initial state of the decoder. Complete the code.
In [6]:
# Inputs and outputs: ten digits
x = tf.placeholder(tf.int32, shape=(32, 10))
y = tf.placeholder(tf.int32, shape=(32, 10))
# One-hot encoding
enc_inputs = tf.one_hot(x, 10)
dec_inputs = tf.concat((tf.zeros_like(y[:, :1]), y[:, :-1]), -1)
dec_inputs = tf.one_hot(dec_inputs, 10)
# encoder
encoder_cell = tf.contrib.rnn.GRUCell(128)
memory, last_state = tf.nn.dynamic_rnn(encoder_cell, enc_inputs, dtype=tf.float32, scope="encoder")
# decoder
decoder_cell = ...
outputs, _ = ...
# Readout
logits = tf.layers.dense(outputs, 10)
preds = tf.argmax(logits, -1, output_type=tf.int32)
# Evaluation
hits = tf.reduce_sum(tf.to_float(tf.equal(preds, y)))
acc = hits / tf.to_float(tf.size(x))
# Loss and train
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
mean_loss = tf.reduce_mean(loss)
opt = tf.train.AdamOptimizer(0.001)
train_op = opt.minimize(mean_loss)
# Session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
losses, accs = [], []
for step in range(2000):
# Data design
# We feed sequences of random digits in the `x`,
# and take its reverse as the target.
_x = np.random.randint(0, 10, size=(32, 10), dtype=np.int32)
_y = _x[:, ::-1] # Reverse
_, _loss, _acc = sess.run([train_op, mean_loss, acc], {x:_x, y:_y})
losses.append(_loss)
accs.append(_acc)
# Plot
plt.plot(losses, label="loss")
plt.plot(accs, label="accuracy")
plt.legend()
plt.grid()
plt.show()
Q2. At this time, we will use the Bahdanau attention mechanism. Complete the code.
In [7]:
tf.reset_default_graph()
# Inputs and outputs: ten digits
x = tf.placeholder(tf.int32, shape=(32, 10))
y = tf.placeholder(tf.int32, shape=(32, 10))
# One-hot encoding
enc_inputs = tf.one_hot(x, 10)
dec_inputs = tf.concat((tf.zeros_like(y[:, :1]), y[:, :-1]), -1)
dec_inputs = tf.one_hot(dec_inputs, 10)
# encoder
encoder_cell = tf.contrib.rnn.GRUCell(128)
memory, last_state = tf.nn.dynamic_rnn(encoder_cell, enc_inputs, dtype=tf.float32, scope="encoder")
# decoder
attention_mechanism = ...
decoder_cell = tf.contrib.rnn.GRUCell(128)
cell_with_attention = tf.contrib.seq2seq.AttentionWrapper(...)
outputs, state = tf.nn.dynamic_rnn(cell_with_attention, dec_inputs, dtype=tf.float32)
alignments = ...
# Readout
logits = tf.layers.dense(outputs, 10)
preds = tf.argmax(logits, -1, output_type=tf.int32)
# Evaluation
hits = tf.reduce_sum(tf.to_float(tf.equal(preds, y)))
acc = hits / tf.to_float(tf.size(x))
# Loss and train
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
mean_loss = tf.reduce_mean(loss)
opt = tf.train.AdamOptimizer(0.001)
train_op = opt.minimize(mean_loss)
# Session
def plot_alignment(alignment):
fig, ax = plt.subplots()
im=ax.imshow(alignment, cmap='Greys', interpolation='none')
fig.colorbar(im, ax=ax)
plt.xlabel('Decoder timestep')
plt.ylabel('Encoder timestep')
plt.show()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
losses, accs = [], []
for step in range(2000):
# Data design
# We feed sequences of random digits in the `x`,
# and take its reverse as the target.
_x = np.random.randint(0, 10, size=(32, 10), dtype=np.int32)
_y = _x[:, ::-1] # Reverse
_, _loss, _acc = sess.run([train_op, mean_loss, acc], {x:_x, y:_y})
losses.append(_loss)
accs.append(_acc)
if step % 100 == 0:
print("step=", step)
_alignments = sess.run(alignments, {x: _x, y: _y})
plot_alignment(_alignments[0])
# Plot
plt.plot(losses, label="loss")
plt.plot(accs, label="accuracy")
plt.legend()
plt.grid()
plt.show()
In [ ]: