In [1]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import BasicLSTMCell, MultiRNNCell, DropoutWrapper
from tensorflow.models.rnn.ptb import reader
Small Config 정보를 사용합니다.
In [2]:
class SmallConfig(object):
"""Small config."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
트레이닝과 테스트에 사용할 두개의 config 오브젝트를 만듭니다.
In [3]:
config = SmallConfig()
eval_config = SmallConfig()
eval_config.batch_size = 1
eval_config.num_steps = 1
PTB 모델을 만들어 주는 클래스를 작성합니다.
In [4]:
class PTBModel(object):
"""The PTB model."""
def __init__(self, config, is_training=False):
self.batch_size = config.batch_size
self.num_steps = config.num_steps
input_size = [config.batch_size, config.num_steps]
self.input_data = tf.placeholder(tf.int32, input_size)
self.targets = tf.placeholder(tf.int32, input_size)
lstm_cell = BasicLSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
if is_training and config.keep_prob < 1:
lstm_cell = DropoutWrapper(lstm_cell, config.keep_prob)
cell = MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=True)
self.initial_state = cell.zero_state(config.batch_size, tf.float32)
with tf.device("/cpu:0"):
embedding_size = [config.vocab_size, config.hidden_size]
embedding = tf.get_variable("embedding", embedding_size)
inputs = tf.nn.embedding_lookup(embedding, self.input_data)
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
outputs = []
state = self.initial_state
with tf.variable_scope("RNN"):
for time_step in range(config.num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
output = tf.reshape(tf.concat(1, outputs), [-1, config.hidden_size])
softmax_w_size = [config.hidden_size, config.vocab_size]
softmax_w = tf.get_variable("softmax_w", softmax_w_size)
softmax_b = tf.get_variable("softmax_b", [config.vocab_size])
logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.seq2seq.sequence_loss_by_example(
[logits],
[tf.reshape(self.targets, [-1])],
[tf.ones([config.batch_size * config.num_steps])])
self.cost = tf.reduce_sum(loss) / config.batch_size
self.final_state = state
if not is_training:
return
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def assign_lr(self, session, lr_value):
session.run(tf.assign(self.lr, lr_value))
에포크를 처리할 함수를 만듭니다.
In [5]:
def run_epoch(session, m, data, is_training=False):
"""Runs the model on the given data."""
epoch_size = ((len(data) // m.batch_size) - 1) // m.num_steps
start_time = time.time()
costs = 0.0
iters = 0
eval_op = m.train_op if is_training else tf.no_op()
state_list = []
for c, h in m.initial_state:
state_list.extend([c.eval(), h.eval()])
ptb_iter = reader.ptb_iterator(data, m.batch_size, m.num_steps)
for step, (x, y) in enumerate(ptb_iter):
fetch_list = [m.cost]
for c, h in m.final_state:
fetch_list.extend([c, h])
fetch_list.append(eval_op)
feed_dict = {m.input_data: x, m.targets: y}
for i in range(len(m.initial_state)):
c, h = m.initial_state[i]
feed_dict[c], feed_dict[h] = state_list[i*2:(i+1)*2]
cost, *state_list, _ = session.run(fetch_list, feed_dict)
costs += cost
iters += m.num_steps
if is_training and step % (epoch_size // 10) == 10:
print("%.3f perplexity: %.3f speed: %.0f wps" %
(step * 1.0 / epoch_size, np.exp(costs / iters),
iters * m.batch_size / (time.time() - start_time)))
return np.exp(costs / iters)
In [6]:
raw_data = reader.ptb_raw_data('simple-examples/data')
train_data, valid_data, test_data, _ = raw_data
train_data, valid_data, test_data 는 단어를 숫자로 바꾼 리스트입니다.
가장 많이 나온 단어 순으로 0번 부터 시작하여 10000번 까지의 번호를 가지고 있습니다.
In [7]:
with tf.Graph().as_default(), tf.Session() as session:
initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
with tf.variable_scope("model", reuse=None, initializer=initializer):
m = PTBModel(config, is_training=True)
with tf.variable_scope("model", reuse=True, initializer=initializer):
mvalid = PTBModel(config)
mtest = PTBModel(eval_config)
tf.initialize_all_variables().run()
for i in range(config.max_max_epoch):
lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
m.assign_lr(session, config.learning_rate * lr_decay)
print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
perplexity = run_epoch(session, m, train_data, is_training=True)
print("Epoch: %d Train Perplexity: %.3f" % (i + 1, perplexity))
perplexity = run_epoch(session, mvalid, valid_data)
print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, perplexity))
perplexity = run_epoch(session, mtest, test_data)
print("Test Perplexity: %.3f" % perplexity)
In [ ]: