In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import helper
import problem_unittests as tests
source_path = 'data/small_vocab_en'
target_path = 'data/small_vocab_fr'
source_text = helper.load_data(source_path)
target_text = helper.load_data(target_path)
In [ ]:
view_sentence_range = (0, 10)
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import numpy as np
print('Dataset Stats')
print('Roughly the number of unique words: {}'.format(len({word: None for word in source_text.split()})))
sentences = source_text.split('\n')
word_counts = [len(sentence.split()) for sentence in sentences]
print('Number of sentences: {}'.format(len(sentences)))
print('Average number of words in a sentence: {}'.format(np.average(word_counts)))
print()
print('English sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(source_text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))
print()
print('French sentences {} to {}:'.format(*view_sentence_range))
print('\n'.join(target_text.split('\n')[view_sentence_range[0]:view_sentence_range[1]]))
In [ ]:
def text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int):
"""
Convert source and target text to proper word ids
:param source_text: String that contains all the source text.
:param target_text: String that contains all the target text.
:param source_vocab_to_int: Dictionary to go from the source words to an id
:param target_vocab_to_int: Dictionary to go from the target words to an id
:return: A tuple of lists (source_id_text, target_id_text)
"""
# TODO: Implement Function
return None, None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_text_to_ids(text_to_ids)
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
helper.preprocess_and_save_data(source_path, target_path, text_to_ids)
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import numpy as np
import helper
(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess()
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
# Check TensorFlow Version
assert LooseVersion(tf.__version__) in [LooseVersion('1.0.0'), LooseVersion('1.0.1')], 'This project requires TensorFlow version 1.0 You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))
# Check for a GPU
if not tf.test.gpu_device_name():
warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
你将通过实现以下函数,构建出要构建一个序列到序列模型所需的组件:
model_inputsprocess_decoding_inputencoding_layerdecoding_layer_traindecoding_layer_inferdecoding_layerseq2seq_model实现 model_inputs() 函数,为神经网络创建 TF 占位符。该函数应该创建以下占位符:
在以下元祖(tuple)中返回占位符:(输入、目标、学习速率、保留率)
In [ ]:
def model_inputs():
"""
Create TF Placeholders for input, targets, and learning rate.
:return: Tuple (input, targets, learning rate, keep probability)
"""
# TODO: Implement Function
return None, None, None, None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_model_inputs(model_inputs)
In [ ]:
def process_decoding_input(target_data, target_vocab_to_int, batch_size):
"""
Preprocess target data for dencoding
:param target_data: Target Placehoder
:param target_vocab_to_int: Dictionary to go from the target words to an id
:param batch_size: Batch Size
:return: Preprocessed target data
"""
# TODO: Implement Function
return None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_process_decoding_input(process_decoding_input)
实现 encoding_layer(),以使用 tf.nn.dynamic_rnn() 创建编码器 RNN 层级。
In [ ]:
def encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob):
"""
Create encoding layer
:param rnn_inputs: Inputs for the RNN
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param keep_prob: Dropout keep probability
:return: RNN state
"""
# TODO: Implement Function
return None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_encoding_layer(encoding_layer)
使用 tf.contrib.seq2seq.simple_decoder_fn_train() 和 tf.contrib.seq2seq.dynamic_rnn_decoder() 创建训练分对数(training logits)。将 output_fn 应用到 tf.contrib.seq2seq.dynamic_rnn_decoder() 输出上。
In [ ]:
def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, sequence_length, decoding_scope,
output_fn, keep_prob):
"""
Create a decoding layer for training
:param encoder_state: Encoder State
:param dec_cell: Decoder RNN Cell
:param dec_embed_input: Decoder embedded input
:param sequence_length: Sequence Length
:param decoding_scope: TenorFlow Variable Scope for decoding
:param output_fn: Function to apply the output layer
:param keep_prob: Dropout keep probability
:return: Train Logits
"""
# TODO: Implement Function
return None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_decoding_layer_train(decoding_layer_train)
使用 tf.contrib.seq2seq.simple_decoder_fn_inference() 和 tf.contrib.seq2seq.dynamic_rnn_decoder() 创建推论分对数(inference logits)。
In [ ]:
def decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id, end_of_sequence_id,
maximum_length, vocab_size, decoding_scope, output_fn, keep_prob):
"""
Create a decoding layer for inference
:param encoder_state: Encoder state
:param dec_cell: Decoder RNN Cell
:param dec_embeddings: Decoder embeddings
:param start_of_sequence_id: GO ID
:param end_of_sequence_id: EOS Id
:param maximum_length: The maximum allowed time steps to decode
:param vocab_size: Size of vocabulary
:param decoding_scope: TensorFlow Variable Scope for decoding
:param output_fn: Function to apply the output layer
:param keep_prob: Dropout keep probability
:return: Inference Logits
"""
# TODO: Implement Function
return None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_decoding_layer_infer(decoding_layer_infer)
实现 decoding_layer() 以创建解码器 RNN 层级。
rnn_size 和 num_layers 创建解码 RNN 单元。lambda 创建输出函数,将输入,也就是分对数转换为类分对数(class logits)。decoding_layer_train(encoder_state, dec_cell, dec_embed_input, sequence_length, decoding_scope, output_fn, keep_prob) 函数获取训练分对数。decoding_layer_infer(encoder_state, dec_cell, dec_embeddings, start_of_sequence_id, end_of_sequence_id, maximum_length, vocab_size, decoding_scope, output_fn, keep_prob) 函数获取推论分对数。注意:你将需要使用 tf.variable_scope 在训练和推论分对数间分享变量。
In [ ]:
def decoding_layer(dec_embed_input, dec_embeddings, encoder_state, vocab_size, sequence_length, rnn_size,
num_layers, target_vocab_to_int, keep_prob):
"""
Create decoding layer
:param dec_embed_input: Decoder embedded input
:param dec_embeddings: Decoder embeddings
:param encoder_state: The encoded state
:param vocab_size: Size of vocabulary
:param sequence_length: Sequence Length
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param target_vocab_to_int: Dictionary to go from the target words to an id
:param keep_prob: Dropout keep probability
:return: Tuple of (Training Logits, Inference Logits)
"""
# TODO: Implement Function
return None, None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_decoding_layer(decoding_layer)
应用你在上方实现的函数,以:
encoding_layer(rnn_inputs, rnn_size, num_layers, keep_prob) 编码输入。process_decoding_input(target_data, target_vocab_to_int, batch_size) 函数处理目标数据。decoding_layer(dec_embed_input, dec_embeddings, encoder_state, vocab_size, sequence_length, rnn_size, num_layers, target_vocab_to_int, keep_prob) 解码编码的输入数据。
In [ ]:
def seq2seq_model(input_data, target_data, keep_prob, batch_size, sequence_length, source_vocab_size, target_vocab_size,
enc_embedding_size, dec_embedding_size, rnn_size, num_layers, target_vocab_to_int):
"""
Build the Sequence-to-Sequence part of the neural network
:param input_data: Input placeholder
:param target_data: Target placeholder
:param keep_prob: Dropout keep probability placeholder
:param batch_size: Batch Size
:param sequence_length: Sequence Length
:param source_vocab_size: Source vocabulary size
:param target_vocab_size: Target vocabulary size
:param enc_embedding_size: Decoder embedding size
:param dec_embedding_size: Encoder embedding size
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param target_vocab_to_int: Dictionary to go from the target words to an id
:return: Tuple of (Training Logits, Inference Logits)
"""
# TODO: Implement Function
return None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_seq2seq_model(seq2seq_model)
In [ ]:
# Number of Epochs
epochs = None
# Batch Size
batch_size = None
# RNN Size
rnn_size = None
# Number of Layers
num_layers = None
# Embedding Size
encoding_embedding_size = None
decoding_embedding_size = None
# Learning Rate
learning_rate = None
# Dropout Keep Probability
keep_probability = None
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
save_path = 'checkpoints/dev'
(source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess()
max_source_sentence_length = max([len(sentence) for sentence in source_int_text])
train_graph = tf.Graph()
with train_graph.as_default():
input_data, targets, lr, keep_prob = model_inputs()
sequence_length = tf.placeholder_with_default(max_source_sentence_length, None, name='sequence_length')
input_shape = tf.shape(input_data)
train_logits, inference_logits = seq2seq_model(
tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, sequence_length, len(source_vocab_to_int), len(target_vocab_to_int),
encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, target_vocab_to_int)
tf.identity(inference_logits, 'logits')
with tf.name_scope("optimization"):
# Loss function
cost = tf.contrib.seq2seq.sequence_loss(
train_logits,
targets,
tf.ones([input_shape[0], sequence_length]))
# Optimizer
optimizer = tf.train.AdamOptimizer(lr)
# Gradient Clipping
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import time
def get_accuracy(target, logits):
"""
Calculate accuracy
"""
max_seq = max(target.shape[1], logits.shape[1])
if max_seq - target.shape[1]:
target = np.pad(
target,
[(0,0),(0,max_seq - target.shape[1])],
'constant')
if max_seq - logits.shape[1]:
logits = np.pad(
logits,
[(0,0),(0,max_seq - logits.shape[1]), (0,0)],
'constant')
return np.mean(np.equal(target, np.argmax(logits, 2)))
train_source = source_int_text[batch_size:]
train_target = target_int_text[batch_size:]
valid_source = helper.pad_sentence_batch(source_int_text[:batch_size])
valid_target = helper.pad_sentence_batch(target_int_text[:batch_size])
with tf.Session(graph=train_graph) as sess:
sess.run(tf.global_variables_initializer())
for epoch_i in range(epochs):
for batch_i, (source_batch, target_batch) in enumerate(
helper.batch_data(train_source, train_target, batch_size)):
start_time = time.time()
_, loss = sess.run(
[train_op, cost],
{input_data: source_batch,
targets: target_batch,
lr: learning_rate,
sequence_length: target_batch.shape[1],
keep_prob: keep_probability})
batch_train_logits = sess.run(
inference_logits,
{input_data: source_batch, keep_prob: 1.0})
batch_valid_logits = sess.run(
inference_logits,
{input_data: valid_source, keep_prob: 1.0})
train_acc = get_accuracy(target_batch, batch_train_logits)
valid_acc = get_accuracy(np.array(valid_target), batch_valid_logits)
end_time = time.time()
print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.3f}, Validation Accuracy: {:>6.3f}, Loss: {:>6.3f}'
.format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss))
# Save Model
saver = tf.train.Saver()
saver.save(sess, save_path)
print('Model Trained and Saved')
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
# Save parameters for checkpoint
helper.save_params(save_path)
In [ ]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
import tensorflow as tf
import numpy as np
import helper
import problem_unittests as tests
_, (source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = helper.load_preprocess()
load_path = helper.load_params()
In [ ]:
def sentence_to_seq(sentence, vocab_to_int):
"""
Convert a sentence to a sequence of ids
:param sentence: String
:param vocab_to_int: Dictionary to go from the words to an id
:return: List of word ids
"""
# TODO: Implement Function
return None
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_sentence_to_seq(sentence_to_seq)
In [ ]:
translate_sentence = 'he saw a old yellow truck .'
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
translate_sentence = sentence_to_seq(translate_sentence, source_vocab_to_int)
loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
# Load saved model
loader = tf.train.import_meta_graph(load_path + '.meta')
loader.restore(sess, load_path)
input_data = loaded_graph.get_tensor_by_name('input:0')
logits = loaded_graph.get_tensor_by_name('logits:0')
keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')
translate_logits = sess.run(logits, {input_data: [translate_sentence], keep_prob: 1.0})[0]
print('Input')
print(' Word Ids: {}'.format([i for i in translate_sentence]))
print(' English Words: {}'.format([source_int_to_vocab[i] for i in translate_sentence]))
print('\nPrediction')
print(' Word Ids: {}'.format([i for i in np.argmax(translate_logits, 1)]))
print(' French Words: {}'.format([target_int_to_vocab[i] for i in np.argmax(translate_logits, 1)]))
你可能注意到了,某些句子的翻译质量比其他的要好。因为你使用的数据集只有 227 个英语单词,但实际生活中有数千个单词,只有使用这些单词的句子结果才会比较理想。对于此项目,不需要达到完美的翻译。但是,如果你想创建更好的翻译模型,则需要更好的数据。
你可以使用 WMT10 French-English corpus 语料库训练模型。该数据集拥有更多的词汇,讨论的话题也更丰富。但是,训练时间要好多天的时间,所以确保你有 GPU 并且对于我们提供的数据集,你的神经网络性能很棒。提交此项目后,别忘了研究下 WMT10 语料库。
提交项目时,确保先运行所有单元,然后再保存记事本。保存记事本文件为 “dlnd_language_translation.ipynb”,再通过菜单中的“文件” ->“下载为”将其另存为 HTML 格式。提交的项目文档中需包含“helper.py”和“problem_unittests.py”文件。