机器翻译的神经网络实现

本节课我们讲述了利用编码器-解码器架构实现汉-英机器翻译。

整个代码包括了数据预处理、编码器+简单解码器以及编码器+带有注意力机制的解码器三个部分组成。

本文件是集智AI学园http://campus.swarma.org 出品的“火炬上的深度学习”第VIII课的配套源代码


In [1]:
# 用到的包
#from __future__ import unicode_literals, print_function, division
# 进行系统操作,如io、正则表达式的包
from io import open
import unicodedata
import string
import re
import random
#import time
#import math

#Pytorch必备的包
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import torch.utils.data as DataSet


# 绘图所用的包
import matplotlib.pyplot as plt
import numpy as np


# 判断本机是否有支持的GPU
use_cuda = torch.cuda.is_available()
# 即时绘图
%matplotlib inline

一、数据准备

从硬盘读取语料文件,进行基本的预处理


In [2]:
# 读取平行语料库
# 这是人民日报语料库
lines = open('data/chinese.txt', encoding = 'utf-8')
chinese = lines.read().strip().split('\n')
lines = open('data/english.txt', encoding = 'utf-8')
english = lines.read().strip().split('\n')
print(len(chinese))
print(len(english))


100000
100000

In [31]:
# 定义两个特殊符号,分别对应句子头和句子尾
SOS_token = 0
EOS_token = 1


# 定义一个语言类,方便进行自动的建立、词频的统计等
# 在这个对象中,最重要的是两个字典:word2index,index2word
# 故名思议,第一个字典是将word映射到索引,第二个是将索引映射到word
class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        # 在语言中添加一个新句子,句子是用空格隔开的一组单词
        # 将单词切分出来,并分别进行处理
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        # 插入一个单词,如果单词已经在字典中,则更新字典中对应单词的频率
        # 同时建立反向索引,可以从单词编号找到单词
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
# 将unicode编码转变为ascii编码
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# 把输入的英文字符串转成小写
def normalizeEngString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s

# 对输入的单词对做过滤,保证每句话的单词数不能超过MAX_LENGTH
def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH

# 输入一个句子,输出一个单词对应的编码序列
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


# 和上面的函数功能类似,不同在于输出的序列等长=MAX_LENGTH
def indexFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    for i in range(MAX_LENGTH - len(indexes)):
        indexes.append(EOS_token)
    return(indexes)

# 从一个词对到下标
def indexFromPair(pair):
    input_variable = indexFromSentence(input_lang, pair[0])
    target_variable = indexFromSentence(output_lang, pair[1])
    return (input_variable, target_variable)

# 从一个列表到句子
def SentenceFromList(lang, lst):
    result = [lang.index2word[i] for i in lst if i != EOS_token]
    if lang.name == 'Chinese':
        result = ' '.join(result)
    else:
        result = ' '.join(result)
    return(result)

# 计算准确度的函数
def rightness(predictions, labels):
    """计算预测错误率的函数,其中predictions是模型给出的一组预测结果,batch_size行num_classes列的矩阵,labels是数据之中的正确答案"""
    pred = torch.max(predictions.data, 1)[1] # 对于任意一行(一个样本)的输出值的第1个维度,求最大,得到每一行的最大元素的下标
    rights = pred.eq(labels.data).sum() #将下标与labels中包含的类别进行比较,并累计得到比较正确的数量
    return rights, len(labels) #返回正确的数量和这一次一共比较了多少元素

In [19]:
# 处理数据形成训练数据
# 设置句子的最大长度
MAX_LENGTH = 20

#对英文做标准化处理
pairs = [[chi, normalizeEngString(eng)] for chi, eng in zip(chinese, english)]

# 对句子对做过滤,处理掉那些超过MAX_LENGTH长度的句子
input_lang = Lang('Chinese')
output_lang = Lang('English')
pairs = [pair for pair in pairs if filterPair(pair)]
print('有效句子对:', len(pairs))

# 建立两个字典(中文的和英文的)
for pair in pairs:
    input_lang.addSentence(pair[0])
    output_lang.addSentence(pair[1])
print("总单词数:")
print(input_lang.name, input_lang.n_words)
print(output_lang.name, output_lang.n_words)


# 形成训练集,首先,打乱所有句子的顺序
random_idx = np.random.permutation(range(len(pairs)))
pairs = [pairs[i] for i in random_idx]

# 将语言转变为单词的编码构成的序列
pairs = [indexFromPair(pair) for pair in pairs]
    
# 形成训练集、校验集和测试集
valid_size = len(pairs) // 10
if valid_size > 10000:
    valid_size = 10000
pairs = pairs[ : - valid_size]
valid_pairs = pairs[-valid_size : -valid_size // 2]
test_pairs = pairs[- valid_size // 2 :]

# 利用PyTorch的dataset和dataloader对象,将数据加载到加载器里面,并且自动分批

batch_size = 30 #一撮包含30个数据记录,这个数字越大,系统在训练的时候,每一个周期处理的数据就越多,这样处理越快,但总的数据量会减少

print('训练记录:', len(pairs))
print('校验记录:', len(valid_pairs))
print('测试记录:', len(test_pairs))

# 形成训练对列表,用于喂给train_dataset
pairs_X = [pair[0] for pair in pairs]
pairs_Y = [pair[1] for pair in pairs]
valid_X = [pair[0] for pair in valid_pairs]
valid_Y = [pair[1] for pair in valid_pairs]
test_X = [pair[0] for pair in test_pairs]
test_Y = [pair[1] for pair in test_pairs]


# 形成训练集
train_dataset = DataSet.TensorDataset(torch.LongTensor(pairs_X), torch.LongTensor(pairs_Y))
# 形成数据加载器
train_loader = DataSet.DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers=8)


# 校验数据
valid_dataset = DataSet.TensorDataset(torch.LongTensor(valid_X), torch.LongTensor(valid_Y))
valid_loader = DataSet.DataLoader(valid_dataset, batch_size = batch_size, shuffle = True, num_workers=8)

# 测试数据
test_dataset = DataSet.TensorDataset(torch.LongTensor(test_X), torch.LongTensor(test_Y))
test_loader = DataSet.DataLoader(test_dataset, batch_size = batch_size, shuffle = True, num_workers = 8)


有效句子对: 19919
总单词数:
Chinese 18671
English 13493
训练记录: 17928
校验记录: 995
测试记录: 996

二、构建编码器及简单的解码器RNN


In [22]:
# 构建编码器RNN
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, n_layers=1):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        # 第一层Embeddeing
        self.embedding = nn.Embedding(input_size, hidden_size)
        # 第二层GRU,注意GRU中可以定义很多层,主要靠num_layers控制
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first = True, 
                          num_layers = self.n_layers, bidirectional = True)

    def forward(self, input, hidden):
        #前馈过程
        #input尺寸: batch_size, length_seq
        embedded = self.embedding(input)
        #embedded尺寸:batch_size, length_seq, hidden_size
        output = embedded
        output, hidden = self.gru(output, hidden)
        # output尺寸:batch_size, length_seq, hidden_size
        # hidden尺寸:num_layers * directions, batch_size, hidden_size
        return output, hidden

    def initHidden(self, batch_size):
        # 对隐含单元变量全部进行初始化
        #num_layers * num_directions, batch, hidden_size
        result = Variable(torch.zeros(self.n_layers * 2, batch_size, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result

# 解码器网络
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1):
        super(DecoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        # 嵌入层
        self.embedding = nn.Embedding(output_size, hidden_size)
        # GRU单元
        # 设置batch_first为True的作用就是为了让GRU接受的张量可以和其它单元类似,第一个维度为batch_size
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first = True,
                        num_layers = self.n_layers, bidirectional = True)
        # 最后的全链接层
        self.out = nn.Linear(hidden_size * 2, output_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, input, hidden):
        # input大小:batch_size, length_seq
        output = self.embedding(input)
        # embedded大小:batch_size, length_seq, hidden_size
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        # output大小:batch_size, length_seq, hidden_size * directions
        # hidden大小:n_layers * directions, batch_size, hidden_size
        output = self.softmax(self.out(output[:, -1, :]))
        # output大小:batch_size * output_size
        # 从output中取时间步重新开始
        
        return output, hidden

    def initHidden(self):
        # 初始化隐含单元的状态,输入变量的尺寸:num_layers * directions, batch_size, hidden_size
        result = Variable(torch.zeros(self.n_layers * 2, batch_size, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result

In [23]:
# 开始训练过程
# 定义网络结构
hidden_size = 512
max_length = MAX_LENGTH
n_layers = 1

encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers = n_layers)
decoder = DecoderRNN(hidden_size, output_lang.n_words, n_layers = n_layers)

if use_cuda:
    # 如果本机有GPU可用,则将模型加载到GPU上
    encoder = encoder.cuda()
    decoder = decoder.cuda()

learning_rate = 0.001
# 为两个网络分别定义优化器
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

# 定义损失函数
criterion = nn.NLLLoss()
teacher_forcing_ratio = 0.5

plot_losses = []

# 开始200轮的循环
num_epoch = 100
for epoch in range(num_epoch):
    print_loss_total = 0
    # 对训练数据循环
    for data in train_loader:
        input_variable = Variable(data[0]).cuda() if use_cuda else Variable(data[0])
        # input_variable的大小:batch_size, length_seq
        target_variable = Variable(data[1]).cuda() if use_cuda else Variable(data[1])
        # target_variable的大小:batch_size, length_seq
        
        # 初始化编码器状态
        encoder_hidden = encoder.initHidden(data[0].size()[0])
        # 清空梯度
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        loss = 0

        # 开始编码器的计算,对时间步的循环由系统自动完成
        encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
        # encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
        # encoder_hidden的大小:direction*n_layer, batch_size, hidden_size
        
        # 开始解码器的工作
        # 输入给解码器的第一个字符
        decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
        # decoder_input大小:batch_size, length_seq
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        # 让解码器的隐藏层状态等于编码器的隐藏层状态
        decoder_hidden = encoder_hidden
        # decoder_hidden大小:direction*n_layer, batch_size, hidden_size

        # 以teacher_forcing_ratio的比例用target中的翻译结果作为监督信息
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        base = torch.zeros(target_variable.size()[0])
        if use_teacher_forcing:
            # 教师监督: 将下一个时间步的监督信息输入给解码器
            # 对时间步循环
            for di in range(MAX_LENGTH):
                # 开始一步解码
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                # decoder_ouput大小:batch_size, output_size
                # 计算损失函数
                loss += criterion(decoder_output, target_variable[:, di])
                # 将训练数据当做下一时间步的输入
                decoder_input = target_variable[:, di].unsqueeze(1)  # Teacher forcing
                # decoder_input大小:batch_size, length_seq
                
        else:
            # 没有教师训练: 使用解码器自己的预测作为下一时间步的输入
            # 开始对时间步进行循环
            for di in range(MAX_LENGTH):
                # 进行一步解码
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                #decoder_ouput大小:batch_size, output_size(vocab_size)
                
                #从输出结果(概率的对数值)中选择出一个数值最大的单词作为输出放到了topi中
                topv, topi = decoder_output.data.topk(1, dim = 1)
                #topi 尺寸:batch_size, k
                ni = topi[:, 0]

                # 将输出结果ni包裹成Variable作为解码器的输入
                decoder_input = Variable(ni.unsqueeze(1))
                # decoder_input大小:batch_size, length_seq
                decoder_input = decoder_input.cuda() if use_cuda else decoder_input

                #计算损失函数
                loss += criterion(decoder_output, target_variable[:, di])
        
            
        
        # 开始反向传播
        loss.backward()
        loss = loss.cpu() if use_cuda else loss
        # 开始梯度下降
        encoder_optimizer.step()
        decoder_optimizer.step()
        # 累加总误差
        print_loss_total += loss.data.numpy()[0]

    # 计算训练时候的平均误差
    print_loss_avg = print_loss_total / len(train_loader)
        
    # 开始跑校验数据集
    valid_loss = 0
    rights = []
    # 对校验数据集循环
    for data in valid_loader:
        input_variable = Variable(data[0]).cuda() if use_cuda else Variable(data[0])
        # input_variable的大小:batch_size, length_seq
        target_variable = Variable(data[1]).cuda() if use_cuda else Variable(data[1])
        # target_variable的大小:batch_size, length_seq

        encoder_hidden = encoder.initHidden(data[0].size()[0])

        loss = 0
        encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
        # encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
        # encoder_hidden的大小:direction*n_layer, batch_size, hidden_size

        decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
        # decoder_input大小:batch_size, length_seq
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        decoder_hidden = encoder_hidden
        # decoder_hidden大小:direction*n_layer, batch_size, hidden_size

        # 没有教师监督: 使用解码器自己的预测作为下一时间步解码器的输入
        for di in range(MAX_LENGTH):
            # 一步解码器运算
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            #decoder_ouput大小:batch_size, output_size(vocab_size)
            
            # 选择输出最大的项作为解码器的预测答案
            topv, topi = decoder_output.data.topk(1, dim = 1)
            #topi 尺寸:batch_size, k
            ni = topi[:, 0]
            decoder_input = Variable(ni.unsqueeze(1))
            # decoder_input大小:batch_size, length_seq
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input
            
            # 计算预测的准确率,记录在right中,right为一个二元组,分别存储猜对的个数和总数
            right = rightness(decoder_output, target_variable[:, di].unsqueeze(1))
            rights.append(right)
            
            # 计算损失函数
            loss += criterion(decoder_output, target_variable[:, di])
        loss = loss.cpu() if use_cuda else loss
        # 累加校验时期的损失函数
        valid_loss += loss.data.numpy()[0]
    # 打印每一个Epoch的输出结果
    right_ratio = 1.0 * np.sum([i[0] for i in rights]) / np.sum([i[1] for i in rights])
    print('进程:%d%% 训练损失:%.4f,校验损失:%.4f,词正确率:%.2f%%' % (epoch * 1.0 / num_epoch * 100, 
                                                    print_loss_avg,
                                                    valid_loss / len(valid_loader),
                                                    100.0 * right_ratio))
    # 记录基本统计指标
    plot_losses.append([print_loss_avg, valid_loss / len(valid_loader), right_ratio])


进程:0% 训练损失:89.9247,校验损失:81.1882,词正确率:34.67%
进程:1% 训练损失:72.5339,校验损失:68.1044,词正确率:37.18%
进程:2% 训练损失:57.4789,校验损失:58.9406,词正确率:37.79%
进程:3% 训练损失:46.5803,校验损失:52.1379,词正确率:41.60%
进程:4% 训练损失:41.5157,校验损失:45.9495,词正确率:45.08%
进程:5% 训练损失:37.0000,校验损失:43.3055,词正确率:48.73%
进程:6% 训练损失:33.7056,校验损失:41.3580,词正确率:50.71%
进程:7% 训练损失:30.1503,校验损失:36.6977,词正确率:54.99%
进程:8% 训练损失:28.2595,校验损失:34.6861,词正确率:57.15%
进程:9% 训练损失:25.8340,校验损失:32.2752,词正确率:59.29%
进程:10% 训练损失:23.2198,校验损失:31.9422,词正确率:62.15%
进程:11% 训练损失:22.6783,校验损失:28.8273,词正确率:64.33%
进程:12% 训练损失:21.4818,校验损失:28.7464,词正确率:66.00%
进程:13% 训练损失:20.0393,校验损失:27.3473,词正确率:67.62%
进程:14% 训练损失:19.3792,校验损失:25.5881,词正确率:68.65%
进程:15% 训练损失:17.6966,校验损失:24.7376,词正确率:69.95%
进程:16% 训练损失:17.8276,校验损失:24.4304,词正确率:71.33%
进程:17% 训练损失:16.8906,校验损失:23.0830,词正确率:73.22%
进程:18% 训练损失:16.2558,校验损失:22.9110,词正确率:73.43%
进程:19% 训练损失:16.0171,校验损失:22.4034,词正确率:73.54%
进程:20% 训练损失:15.6500,校验损失:21.8747,词正确率:73.88%
进程:21% 训练损失:14.9279,校验损失:20.8785,词正确率:75.64%
进程:22% 训练损失:15.4368,校验损失:21.2922,词正确率:76.22%
进程:23% 训练损失:14.0406,校验损失:19.9862,词正确率:76.75%
进程:24% 训练损失:14.1648,校验损失:20.2397,词正确率:77.32%
进程:25% 训练损失:14.9957,校验损失:20.7080,词正确率:76.36%
进程:26% 训练损失:15.3461,校验损失:20.9806,词正确率:76.48%
进程:27% 训练损失:14.4112,校验损失:19.7684,词正确率:77.16%
进程:28% 训练损失:13.6654,校验损失:18.9664,词正确率:79.52%
进程:28% 训练损失:13.5648,校验损失:19.5339,词正确率:77.26%
进程:30% 训练损失:13.6815,校验损失:18.9681,词正确率:78.93%
进程:31% 训练损失:13.4977,校验损失:19.3318,词正确率:78.46%
进程:32% 训练损失:13.8443,校验损失:18.3663,词正确率:79.91%
进程:33% 训练损失:13.5776,校验损失:18.7688,词正确率:79.38%
进程:34% 训练损失:12.5456,校验损失:17.6769,词正确率:80.60%
进程:35% 训练损失:12.0912,校验损失:17.5641,词正确率:80.92%
进程:36% 训练损失:12.3518,校验损失:17.4344,词正确率:81.29%
进程:37% 训练损失:12.2112,校验损失:17.7829,词正确率:80.81%
进程:38% 训练损失:12.4860,校验损失:19.8045,词正确率:78.58%
进程:39% 训练损失:13.2213,校验损失:19.3801,词正确率:79.44%
进程:40% 训练损失:12.8615,校验损失:18.3904,词正确率:80.15%
进程:41% 训练损失:13.6158,校验损失:19.2615,词正确率:79.17%
进程:42% 训练损失:13.2477,校验损失:19.5315,词正确率:78.52%
进程:43% 训练损失:13.3466,校验损失:18.7686,词正确率:79.69%
进程:44% 训练损失:13.2575,校验损失:18.1053,词正确率:80.85%
进程:45% 训练损失:12.3427,校验损失:17.5427,词正确率:82.04%
进程:46% 训练损失:11.8990,校验损失:17.0585,词正确率:82.15%
进程:47% 训练损失:12.2597,校验损失:18.2527,词正确率:80.70%
进程:48% 训练损失:12.3291,校验损失:17.9417,词正确率:80.88%
进程:49% 训练损失:12.7277,校验损失:19.0262,词正确率:80.08%
进程:50% 训练损失:13.9948,校验损失:19.9097,词正确率:78.92%
进程:51% 训练损失:13.8022,校验损失:18.9199,词正确率:80.29%
进程:52% 训练损失:12.6219,校验损失:17.6764,词正确率:81.15%
进程:53% 训练损失:12.9052,校验损失:18.4251,词正确率:80.59%
进程:54% 训练损失:12.4066,校验损失:17.1330,词正确率:82.49%
进程:55% 训练损失:11.9578,校验损失:17.3121,词正确率:81.62%
进程:56% 训练损失:13.1919,校验损失:20.1295,词正确率:78.51%
进程:56% 训练损失:14.7060,校验损失:19.7654,词正确率:79.13%
进程:57% 训练损失:14.3964,校验损失:20.2252,词正确率:78.01%
进程:59% 训练损失:13.6469,校验损失:18.1988,词正确率:81.23%
进程:60% 训练损失:13.1992,校验损失:19.2709,词正确率:78.90%
进程:61% 训练损失:13.1777,校验损失:19.3619,词正确率:78.97%
进程:62% 训练损失:13.4007,校验损失:19.3861,词正确率:79.86%
进程:63% 训练损失:12.4746,校验损失:18.3404,词正确率:80.72%
进程:64% 训练损失:13.6272,校验损失:19.2301,词正确率:79.89%
进程:65% 训练损失:13.8078,校验损失:19.8143,词正确率:79.28%
进程:66% 训练损失:14.0686,校验损失:19.3719,词正确率:79.62%
进程:67% 训练损失:13.8010,校验损失:19.3596,词正确率:79.54%
进程:68% 训练损失:12.8349,校验损失:18.2700,词正确率:80.86%
进程:69% 训练损失:12.9624,校验损失:18.8305,词正确率:80.23%
进程:70% 训练损失:13.1850,校验损失:18.3850,词正确率:81.01%
进程:71% 训练损失:13.8452,校验损失:19.9344,词正确率:78.73%
进程:72% 训练损失:13.8753,校验损失:19.3479,词正确率:80.01%
进程:73% 训练损失:14.6577,校验损失:21.0019,词正确率:77.71%
进程:74% 训练损失:15.1176,校验损失:20.9338,词正确率:78.03%
进程:75% 训练损失:14.9124,校验损失:20.0336,词正确率:78.68%
进程:76% 训练损失:13.8223,校验损失:19.0024,词正确率:80.21%
进程:77% 训练损失:13.5152,校验损失:19.2895,词正确率:79.90%
进程:78% 训练损失:14.6623,校验损失:19.9568,词正确率:78.64%
进程:79% 训练损失:13.6616,校验损失:19.5123,词正确率:79.32%
进程:80% 训练损失:14.0290,校验损失:19.7559,词正确率:78.77%
进程:81% 训练损失:13.8777,校验损失:19.2427,词正确率:79.70%
进程:82% 训练损失:13.9563,校验损失:19.9950,词正确率:78.82%
进程:83% 训练损失:14.5353,校验损失:20.8423,词正确率:77.88%
进程:84% 训练损失:14.7102,校验损失:19.5419,词正确率:79.47%
进程:85% 训练损失:14.0179,校验损失:19.5941,词正确率:79.55%
进程:86% 训练损失:14.1646,校验损失:19.4973,词正确率:79.51%
进程:87% 训练损失:12.9281,校验损失:18.6479,词正确率:80.69%
进程:88% 训练损失:13.7345,校验损失:19.5333,词正确率:79.72%
进程:89% 训练损失:14.8408,校验损失:20.1166,词正确率:78.62%
进程:90% 训练损失:14.0102,校验损失:20.0659,词正确率:79.14%
进程:91% 训练损失:13.4085,校验损失:19.1350,词正确率:79.98%
进程:92% 训练损失:13.9362,校验损失:20.2341,词正确率:78.85%
进程:93% 训练损失:14.2627,校验损失:19.9931,词正确率:78.81%
进程:94% 训练损失:14.9471,校验损失:20.1555,词正确率:78.88%
进程:95% 训练损失:14.4255,校验损失:19.5162,词正确率:80.07%
进程:96% 训练损失:13.7533,校验损失:19.2370,词正确率:79.93%
进程:97% 训练损失:13.6021,校验损失:19.2544,词正确率:79.82%
进程:98% 训练损失:13.1951,校验损失:19.3172,词正确率:79.87%
进程:99% 训练损失:14.8555,校验损失:20.0122,词正确率:79.17%

In [24]:
# 将统计指标绘图
a = [i[0] for i in plot_losses]
b = [i[1] for i in plot_losses]
c = [i[2] * 100 for i in plot_losses]
plt.plot(a, label = 'Training Loss')
plt.plot(b, label = 'Validation Loss')
plt.plot(c, label = 'Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Loss & Accuracy')
plt.legend()


Out[24]:
<matplotlib.legend.Legend at 0x10e9d5e48>

In [26]:
# 在测试集上测试模型运行的效果

# 首先,在测试集中随机选择20个句子作为测试
indices = np.random.choice(range(len(test_X)), 20)

# 对每个句子进行循环
for ind in indices:
    data = [test_X[ind]]
    target = [test_Y[ind]]
    # 把源语言的句子打印出来
    print(SentenceFromList(input_lang, data[0]))
    input_variable = Variable(torch.LongTensor(data)).cuda() if use_cuda else Variable(torch.LongTensor(data))
    # input_variable的大小:batch_size, length_seq
    target_variable = Variable(torch.LongTensor(target)).cuda() if use_cuda else Variable(torch.LongTensor(target))
    # target_variable的大小:batch_size, length_seq

    # 初始化编码器
    encoder_hidden = encoder.initHidden(input_variable.size()[0])

    loss = 0
    
    # 编码器开始编码,结果存储到了encoder_hidden中
    encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
    # encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
    # encoder_hidden的大小:direction*n_layer, batch_size, hidden_size

    # 将SOS作为解码器的第一个输入
    decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
    # decoder_input大小:batch_size, length_seq
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    # 将编码器的隐含层单元数值拷贝给解码器的隐含层单元
    decoder_hidden = encoder_hidden
    # decoder_hidden大小:direction*n_layer, batch_size, hidden_size

    # 没有教师指导下的预测: 使用解码器自己的预测作为解码器下一时刻的输入
    output_sentence = []
    decoder_attentions = torch.zeros(max_length, max_length)
    rights = []
    # 按照输出字符进行时间步循环
    for di in range(MAX_LENGTH):
        # 解码器一个时间步的计算
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        #decoder_ouput大小:batch_size, output_size(vocab_size)
        
        # 解码器的输出
        topv, topi = decoder_output.data.topk(1, dim = 1)
        #topi 尺寸:batch_size, k
        ni = topi[:, 0]
        decoder_input = Variable(ni.unsqueeze(1))
        ni = ni.numpy()[0]
        
        # 将本时间步输出的单词编码加到output_sentence里面
        output_sentence.append(ni)
        # decoder_input大小:batch_size, length_seq
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input
        
        # 计算输出字符的准确度
        right = rightness(decoder_output, target_variable[:, di].unsqueeze(1))
        rights.append(right)
    # 解析出编码器给出的翻译结果
    sentence = SentenceFromList(output_lang, output_sentence)
    # 解析出标准答案
    standard = SentenceFromList(output_lang, target[0])
    
    # 将句子打印出来
    print('机器翻译:', sentence)
    print('标准翻译:', standard)
    # 输出本句话的准确率
    right_ratio = 1.0 * np.sum([i[0] for i in rights]) / np.sum([i[1] for i in rights])
    print('词准确率:', 100.0 * right_ratio)
    print('\n')


第一牺牲品将是总统候选人在竞选期间做出的承诺.
机器翻译: the first sacrifice will the promises promises by the presidential the presidential presidential candidates the of .
标准翻译: the first sacrifice will be the campaign promises made by the two presidential candidates during the campaign .
词准确率: 45.0


不懈奋斗的精神,就是我们推进社会主义现代化建设伟大事业的精神支柱.
机器翻译: the spirit of struggling persistently is the spiritual backbone in advancing the great cause of socialist modernization construction .
标准翻译: the spirit of struggling persistently is the spiritual backbone in advancing the great cause of socialist modernization construction .
词准确率: 100.0


吴邦国,黄菊,吴阶平,韩杼滨等也参加了上海代表团的会议.
机器翻译: wu bangguo huang ju wu jieping han zhubin also took part in in shanghai delegation delegation in shanghai .
标准翻译: wu bangguo huang ju wu jieping and han zhubin also took part in the shanghai delegation s meeting .
词准确率: 50.0


孙炎彪夫妇受贿案是由浙江省检察院1999年侦查终结后移送浙江省嘉兴市检察院起诉的.
机器翻译: the bribery case of the sun yanbiao couple was investigated by the zhejiang provincial procuratorate in .
标准翻译: the bribery case of the sun yanbiao couple was investigated by the zhejiang provincial procuratorate in .
词准确率: 100.0


中国驻美大使李肇星参加了上述会见.
机器翻译: also attending the meetings was li zhaoxing ambassador to the united states was chinese .
标准翻译: also attending the meetings was li zhaoxing chinese ambassador to the united states .
词准确率: 60.0


表面气氛友好,但主要目的没有达到.
机器翻译: the atmosphere was good superficially but bush bush to achieve his main purposes .
标准翻译: the atmosphere was good superficially but bush failed to achieve his main purposes .
词准确率: 95.0


我国的"863"计划,是顺应世界高技术发展潮流而提出的.
机器翻译: china s project was proposed in response to the development trend of the world s high technology .
标准翻译: china s project was proposed in response to the development trend of the world s high technology .
词准确率: 100.0


美国还加大了干预俄罗斯内部事务的力度.
机器翻译: the united states has intensified its inference in internal affairs .
标准翻译: the united states has intensified its inference in russian internal affairs .
词准确率: 80.0


加强聘后管理.
机器翻译: enhance post hiring management .
标准翻译: enhance post hiring management .
词准确率: 100.0


这是出席今天会议的全体代表的共识.
机器翻译: this is the consensus of all deputies attending today today .
标准翻译: this is the consensus of all deputies attending today s session .
词准确率: 85.0


这充分体现了党和政府对人民军队的关爱.
机器翻译: this fully reflects the concern of the party and government for the people s army .
标准翻译: this fully reflects the concern of the party and government for the people s army .
词准确率: 100.0


近几年持续干旱,使水力发电"开工不足".
机器翻译: continuous drought in recent years has resulted in the continuous hydroelectric power stations to work under capacity .
标准翻译: the continuous drought in recent years has resulted in the hydroelectric power stations to work under capacity .
词准确率: 50.0


这是特区通过向全国辐射,为中国作出的重大的贡献.
机器翻译: this is the momentous contribution of the sezs which has radiated the entire nation .
标准翻译: this is the momentous contribution of the sezs which has radiated the entire nation .
词准确率: 100.0


7日上午的讨论会由波兰总统克瓦希涅夫斯基主持.
机器翻译: the september group discussion was discussion by chaired polish kwasniewski kwasniewski .
标准翻译: the september group discussion was chaired by polish president kwasniewski .
词准确率: 75.0


李岚清热情洋溢的演说受到了与会者的热烈欢迎.
机器翻译: li lanqing s enthusiastic was was was was was was was was meeting in .
标准翻译: li lanqing s enthusiastic speech was warmly acclaimed by the participants of the meeting .
词准确率: 55.0


如果他们的所作所为对两岸关系造成严重后果,责任要由他们来承担.
机器翻译: if what they serious to to relations strait they do they to to relations all responsibilities responsibilities .
标准翻译: if what they do cause serious consequences to cross strait relations they will have to bear all responsibilities .
词准确率: 20.0


次日,共和党采取人海战术,对杰福兹展开了轮番劝说.
机器翻译: the following day the republicans adopted human sea tactics by taking it in turns to advise him .
标准翻译: the following day the republicans adopted human sea tactics by taking it in turns to advise him .
词准确率: 100.0


范匡夫在这方面向党和人民交了一份合格的答卷.
机器翻译: fan kuangfu has submitted a relevant qualified answer in this field to the party and people .
标准翻译: fan kuangfu has submitted a relevant qualified answer in this field to the party and people .
词准确率: 100.0


第一,要继续坚持通过改革推进发展的路子.
机器翻译: first we should continue to adhere to the road of promoting development through .
标准翻译: first we should continue to adhere to the road of promoting development through reform .
词准确率: 90.0


立法法草案此前已经全国人大常委会两审议.
机器翻译: the draft legislative had been deliberated twice deliberated earlier by the delegation .
标准翻译: the draft legislative law had been deliberated twice earlier by the npcsc .
词准确率: 70.0


Attention Model


In [12]:
# 重新处理数据形成训练数据、校验数据与测试数据,主要是MAX_Length更大了
# 设置句子的最大长度
MAX_LENGTH = 20

#对英文做标准化处理
pairs = [[chi, normalizeEngString(eng)] for chi, eng in zip(chinese, english)]

# 对句子对做过滤,处理掉那些超过MAX_LENGTH长度的句子
input_lang = Lang('Chinese')
output_lang = Lang('English')
pairs = [pair for pair in pairs if filterPair(pair)]
print('有效句子对:', len(pairs))

# 建立两个字典(中文的和英文的)
for pair in pairs:
    input_lang.addSentence(pair[0])
    output_lang.addSentence(pair[1])
print("总单词数:")
print(input_lang.name, input_lang.n_words)
print(output_lang.name, output_lang.n_words)


# 形成训练集,首先,打乱所有句子的顺序
random_idx = np.random.permutation(range(len(pairs)))
pairs = [pairs[i] for i in random_idx]

# 将语言转变为单词的编码构成的序列
pairs = [indexFromPair(pair) for pair in pairs]
    
# 形成训练集、校验集和测试集
valid_size = len(pairs) // 10
if valid_size > 10000:
    valid_size = 10000
pairs = pairs[ : - valid_size]
valid_pairs = pairs[-valid_size : -valid_size // 2]
test_pairs = pairs[- valid_size // 2 :]

# 利用PyTorch的dataset和dataloader对象,将数据加载到加载器里面,并且自动分批

batch_size = 30 #一撮包含30个数据记录,这个数字越大,系统在训练的时候,每一个周期处理的数据就越多,这样处理越快,但总的数据量会减少

print('训练记录:', len(pairs))
print('校验记录:', len(valid_pairs))
print('测试记录:', len(test_pairs))

# 形成训练对列表,用于喂给train_dataset
pairs_X = [pair[0] for pair in pairs]
pairs_Y = [pair[1] for pair in pairs]
valid_X = [pair[0] for pair in pairs]
valid_Y = [pair[1] for pair in pairs]
test_X = [pair[0] for pair in pairs]
test_Y = [pair[1] for pair in pairs]


# 形成训练集
train_dataset = DataSet.TensorDataset(torch.LongTensor(pairs_X), torch.LongTensor(pairs_Y))
# 形成数据加载器
train_loader = DataSet.DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers=8)


# 校验数据
valid_dataset = DataSet.TensorDataset(torch.LongTensor(valid_X), torch.LongTensor(valid_Y))
valid_loader = DataSet.DataLoader(valid_dataset, batch_size = batch_size, shuffle = True, num_workers=8)

# 测试数据
test_dataset = DataSet.TensorDataset(torch.LongTensor(test_X), torch.LongTensor(test_Y))
test_loader = DataSet.DataLoader(test_dataset, batch_size = batch_size, shuffle = True, num_workers = 8)


有效句子对: 19919
总单词数:
Chinese 18671
English 13493
训练记录: 17928
校验记录: 995
测试记录: 996

In [27]:
# 定义基于注意力的解码器RNN
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.max_length = max_length

        # 词嵌入层
        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        
        # 注意力网络(一个前馈神经网络)
        self.attn = nn.Linear(self.hidden_size * (2 * n_layers + 1), self.max_length)
    
        # 注意力机制作用完后的结果映射到后面的层
        self.attn_combine = nn.Linear(self.hidden_size * 3, self.hidden_size)
        
        # dropout操作层
        self.dropout = nn.Dropout(self.dropout_p)
        
        # 定义一个双向GRU,并设置batch_first为True以方便操作
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional = True,
                         num_layers = self.n_layers, batch_first = True)
        self.out = nn.Linear(self.hidden_size * 2, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        # 解码器的一步操作
        # input大小:batch_size, length_seq
        embedded = self.embedding(input)
        # embedded大小:batch_size, length_seq, hidden_size
        embedded = embedded[:, 0, :]
        # embedded大小:batch_size, hidden_size
        embedded = self.dropout(embedded)
        
        # 将hidden张量数据转化成batch_size排在第0维的形状
        # hidden大小:direction*n_layer, batch_size, hidden_size
        temp_for_transpose = torch.transpose(hidden, 0, 1).contiguous()
        temp_for_transpose = temp_for_transpose.view(temp_for_transpose.size()[0], -1)
        hidden_attn = temp_for_transpose
        
        # 注意力层的输入
        # hidden_attn大小:batch_size, direction*n_layers*hidden_size
        input_to_attention = torch.cat((embedded, hidden_attn), 1)
        # input_to_attention大小:batch_size, hidden_size * (1 + direction * n_layers)
        
        # 注意力层输出的权重
        attn_weights = F.softmax(self.attn(input_to_attention))
        # attn_weights大小:batch_size, max_length
        
        # 当输入数据不标准的时候,对weights截取必要的一段
        attn_weights = attn_weights[:, : encoder_outputs.size()[1]]
        # attn_weights大小:batch_size, length_seq_of_encoder
        attn_weights = attn_weights.unsqueeze(1)
        # attn_weights大小:batch_size, 1, length_seq 中间的1是为了bmm乘法用的
        
        # 将attention的weights矩阵乘encoder_outputs以计算注意力完的结果
        # encoder_outputs大小:batch_size, seq_length, hidden_size*direction
        attn_applied = torch.bmm(attn_weights, encoder_outputs) 
        # attn_applied大小:batch_size, 1, hidden_size*direction
        # bmm: 两个矩阵相乘。忽略第一个batch纬度,缩并时间维度
        
        # 将输入的词向量与注意力机制作用后的结果拼接成一个大的输入向量
        output = torch.cat((embedded, attn_applied[:,0,:]), 1)
        # output大小:batch_size, hidden_size * (direction + 1)
        
        # 将大输入向量映射为GRU的隐含层
        output = self.attn_combine(output).unsqueeze(1)
        # output大小:batch_size, length_seq, hidden_size
        output = F.relu(output)
        
        # output的结果再dropout
        output = self.dropout(output)

        # 开始解码器GRU的运算
        output, hidden = self.gru(output, hidden)
        
        
        # output大小:batch_size, length_seq, hidden_size * directions
        # hidden大小:n_layers * directions, batch_size, hidden_size
        
        #取出GRU运算最后一步的结果喂给最后一层全链接层
        output = self.out(output[:, -1, :])
        # output大小:batch_size * output_size
        
        # 取logsoftmax,计算输出结果
        output = F.log_softmax(output)
        # output大小:batch_size * output_size
        return output, hidden, attn_weights

    def initHidden(self, batch_size):
        # 初始化解码器隐单元,尺寸为n_layers * directions, batch_size, hidden_size
        result = Variable(torch.zeros(self.n_layers * 2, batch_size, self.hidden_size))
        if use_cuda:
            return result.cuda()
        else:
            return result

In [28]:
# 开始带有注意力机制的RNN训练

#定义网络架构
hidden_size = 512
max_length = MAX_LENGTH
n_layers = 1
encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers = n_layers)
decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.5,
                         max_length = max_length, n_layers = n_layers)

if use_cuda:
    encoder = encoder.cuda()
    decoder = decoder.cuda()

print_loss_total = 0  # Reset every print_every
learning_rate = 0.0001
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

criterion = nn.NLLLoss()
#criterion = Batch_NLLLoss
teacher_forcing_ratio = 0.5

num_epoch = 100

# 开始训练周期循环
plot_losses = []
for epoch in range(num_epoch):
    # 将解码器置于训练状态,让dropout工作
    decoder.train()
    print_loss_total = 0
    # 对训练数据进行循环
    for data in train_loader:
        input_variable = Variable(data[0]).cuda() if use_cuda else Variable(data[0])
        # input_variable的大小:batch_size, length_seq
        target_variable = Variable(data[1]).cuda() if use_cuda else Variable(data[1])
        # target_variable的大小:batch_size, length_seq
        
        #清空梯度
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()
        
        encoder_hidden = encoder.initHidden(data[0].size()[0])

        loss = 0

        #编码器开始工作
        encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
        # encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
        # encoder_hidden的大小:direction*n_layer, batch_size, hidden_size

        # 解码器开始工作
        decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
        # decoder_input大小:batch_size, length_seq
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        # 将编码器的隐含层单元取值作为编码的结果传递给解码器
        decoder_hidden = encoder_hidden
        # decoder_hidden大小:direction*n_layer, batch_size, hidden_size

        # 同时按照两种方式训练解码器:用教师监督的信息作为下一时刻的输入和不用监督的信息,用自己预测结果作为下一时刻的输入
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        if use_teacher_forcing:
            # 用监督信息作为下一时刻解码器的输入
            # 开始时间不得循环
            for di in range(MAX_LENGTH):
                # 输入给解码器的信息包括输入的单词decoder_input, 解码器上一时刻的因曾单元状态,
                # 编码器各个时间步的输出结果
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                #decoder_ouput大小:batch_size, output_size
                #计算损失函数,得到下一时刻的解码器的输入
                loss += criterion(decoder_output, target_variable[:, di])
                decoder_input = target_variable[:, di].unsqueeze(1)  # Teacher forcing
                # decoder_input大小:batch_size, length_seq
        else:
            # 没有教师监督,用解码器自己的预测作为下一时刻的输入

            # 对时间步进行循环
            for di in range(MAX_LENGTH):
                decoder_output, decoder_hidden, decoder_attention = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                #decoder_ouput大小:batch_size, output_size(vocab_size)
                # 获取解码器的预测结果,并用它来作为下一时刻的输入
                topv, topi = decoder_output.data.topk(1, dim = 1)
                #topi 尺寸:batch_size, k
                ni = topi[:, 0]

                decoder_input = Variable(ni.unsqueeze(1))
                # decoder_input大小:batch_size, length_seq
                decoder_input = decoder_input.cuda() if use_cuda else decoder_input

                # 计算损失函数
                loss += criterion(decoder_output, target_variable[:, di])
        
        
        
        # 反向传播开始
        loss.backward()
        loss = loss.cpu() if use_cuda else loss
        # 开始梯度下降
        encoder_optimizer.step()
        decoder_optimizer.step()
        print_loss_total += loss.data.numpy()[0]

    print_loss_avg = print_loss_total / len(train_loader)
        
    valid_loss = 0
    rights = []
    # 将解码器的training设置为False,以便关闭dropout
    decoder.eval()
    
    #对所有的校验数据做循环
    for data in valid_loader:
        input_variable = Variable(data[0]).cuda() if use_cuda else Variable(data[0])
        # input_variable的大小:batch_size, length_seq
        target_variable = Variable(data[1]).cuda() if use_cuda else Variable(data[1])
        # target_variable的大小:batch_size, length_seq

        encoder_hidden = encoder.initHidden(data[0].size()[0])

        loss = 0
        encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
        # encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
        # encoder_hidden的大小:direction*n_layer, batch_size, hidden_size

        decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
        # decoder_input大小:batch_size, length_seq
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

        decoder_hidden = encoder_hidden
        # decoder_hidden大小:direction*n_layer, batch_size, hidden_size

        # 开始每一步的预测
        for di in range(MAX_LENGTH):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            #decoder_ouput大小:batch_size, output_size(vocab_size)
            topv, topi = decoder_output.data.topk(1, dim = 1)
            #topi 尺寸:batch_size, k
            ni = topi[:, 0]

            decoder_input = Variable(ni.unsqueeze(1))
            # decoder_input大小:batch_size, length_seq
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input
            right = rightness(decoder_output, target_variable[:, di].unsqueeze(1))
            rights.append(right)
            loss += criterion(decoder_output, target_variable[:, di])
        loss = loss.cpu() if use_cuda else loss
        valid_loss += loss.data.numpy()[0]
    # 计算平均损失、准确率等指标并打印输出
    right_ratio = 1.0 * np.sum([i[0] for i in rights]) / np.sum([i[1] for i in rights])
    print('进程:%d%% 训练损失:%.4f,校验损失:%.4f,词正确率:%.2f%%' % (epoch * 1.0 / num_epoch * 100, 
                                                    print_loss_avg,
                                                    valid_loss / len(valid_loader),
                                                    100.0 * right_ratio))
    plot_losses.append([print_loss_avg, valid_loss / len(valid_loader), right_ratio])


进程:0% 训练损失:100.2571,校验损失:95.5506,词正确率:32.41%
进程:1% 训练损失:92.0658,校验损失:92.3374,词正确率:33.37%
进程:2% 训练损失:87.7127,校验损失:88.8387,词正确率:33.96%
进程:3% 训练损失:84.0813,校验损失:86.7723,词正确率:34.09%
进程:4% 训练损失:80.9802,校验损失:83.0259,词正确率:34.97%
进程:5% 训练损失:78.2679,校验损失:81.1532,词正确率:35.31%
进程:6% 训练损失:75.9566,校验损失:78.5189,词正确率:35.86%
进程:7% 训练损失:73.4438,校验损失:76.5526,词正确率:36.33%
进程:8% 训练损失:71.1783,校验损失:74.0206,词正确率:36.82%
进程:9% 训练损失:68.5947,校验损失:71.3383,词正确率:37.69%
进程:10% 训练损失:66.5749,校验损失:70.6288,词正确率:37.51%
进程:11% 训练损失:64.9859,校验损失:67.2967,词正确率:38.94%
进程:12% 训练损失:62.4203,校验损失:66.7298,词正确率:38.81%
进程:13% 训练损失:60.6931,校验损失:64.6684,词正确率:39.48%
进程:14% 训练损失:58.8749,校验损失:61.8657,词正确率:40.87%
进程:15% 训练损失:57.3162,校验损失:60.4037,词正确率:41.61%
进程:16% 训练损失:55.3753,校验损失:59.0980,词正确率:42.20%
进程:17% 训练损失:54.1357,校验损失:58.0374,词正确率:42.25%
进程:18% 训练损失:52.5234,校验损失:54.9538,词正确率:44.06%
进程:19% 训练损失:50.7388,校验损失:55.1816,词正确率:43.67%
进程:20% 训练损失:49.8442,校验损失:53.3292,词正确率:44.48%
进程:21% 训练损失:47.9882,校验损失:50.8352,词正确率:46.16%
进程:22% 训练损失:47.1954,校验损失:49.8542,词正确率:46.47%
进程:23% 训练损失:45.2600,校验损失:48.6119,词正确率:47.17%
进程:24% 训练损失:43.9106,校验损失:47.5854,词正确率:47.58%
进程:25% 训练损失:42.6943,校验损失:46.0853,词正确率:48.86%
进程:26% 训练损失:41.8625,校验损失:44.4935,词正确率:49.60%
进程:27% 训练损失:40.7282,校验损失:44.1559,词正确率:50.12%
进程:28% 训练损失:39.4585,校验损失:41.7528,词正确率:51.87%
进程:28% 训练损失:38.4489,校验损失:41.3920,词正确率:51.87%
进程:30% 训练损失:37.6140,校验损失:40.0568,词正确率:53.13%
进程:31% 训练损失:36.4164,校验损失:38.6405,词正确率:54.07%
进程:32% 训练损失:35.0357,校验损失:37.7257,词正确率:54.47%
进程:33% 训练损失:34.0059,校验损失:36.5531,词正确率:55.58%
进程:34% 训练损失:33.0244,校验损失:35.5307,词正确率:56.53%
进程:35% 训练损失:32.3108,校验损失:35.6306,词正确率:56.56%
进程:36% 训练损失:31.4878,校验损失:33.2003,词正确率:58.41%
进程:37% 训练损失:30.6260,校验损失:32.3703,词正确率:59.13%
进程:38% 训练损失:29.7303,校验损失:31.3013,词正确率:60.22%
进程:39% 训练损失:28.7125,校验损失:30.0268,词正确率:61.59%
进程:40% 训练损失:28.1812,校验损失:29.2789,词正确率:62.17%
进程:41% 训练损失:27.0775,校验损失:28.6632,词正确率:62.90%
进程:42% 训练损失:26.5126,校验损失:27.4051,词正确率:64.28%
进程:43% 训练损失:25.7604,校验损失:26.4756,词正确率:65.04%
进程:44% 训练损失:24.7720,校验损失:25.6129,词正确率:66.27%
进程:45% 训练损失:24.2149,校验损失:24.8751,词正确率:66.97%
进程:46% 训练损失:23.5065,校验损失:23.7855,词正确率:68.01%
进程:47% 训练损失:22.4146,校验损失:23.1251,词正确率:69.33%
进程:48% 训练损失:21.6815,校验损失:22.1897,词正确率:70.24%
进程:49% 训练损失:21.0661,校验损失:21.7550,词正确率:70.93%
进程:50% 训练损失:20.6787,校验损失:20.7181,词正确率:72.26%
进程:51% 训练损失:19.7385,校验损失:19.6238,词正确率:73.63%
进程:52% 训练损失:19.0847,校验损失:18.8480,词正确率:74.36%
进程:53% 训练损失:18.5307,校验损失:18.1465,词正确率:75.49%
进程:54% 训练损失:17.7857,校验损失:18.2258,词正确率:75.77%
进程:55% 训练损失:17.3451,校验损失:17.2500,词正确率:76.92%
进程:56% 训练损失:16.8286,校验损失:16.6295,词正确率:78.01%
进程:56% 训练损失:16.4130,校验损失:15.5581,词正确率:79.03%
进程:57% 训练损失:15.8551,校验损失:14.9797,词正确率:79.97%
进程:59% 训练损失:15.3657,校验损失:14.1971,词正确率:81.08%
进程:60% 训练损失:14.5660,校验损失:14.1195,词正确率:81.40%
进程:61% 训练损失:14.4498,校验损失:12.9542,词正确率:82.78%
进程:62% 训练损失:13.6513,校验损失:13.4632,词正确率:82.48%
进程:63% 训练损失:13.4196,校验损失:11.8805,词正确率:84.43%
进程:64% 训练损失:12.8901,校验损失:11.7526,词正确率:84.80%
进程:65% 训练损失:12.5301,校验损失:11.4973,词正确率:85.33%
进程:66% 训练损失:12.2318,校验损失:10.4698,词正确率:86.75%
进程:67% 训练损失:11.6670,校验损失:9.8156,词正确率:87.62%
进程:68% 训练损失:11.5099,校验损失:9.4508,词正确率:88.17%
进程:69% 训练损失:10.9179,校验损失:9.0752,词正确率:88.81%
进程:70% 训练损失:10.9672,校验损失:8.7181,词正确率:89.34%
进程:71% 训练损失:10.3690,校验损失:8.4533,词正确率:89.63%
进程:72% 训练损失:10.3004,校验损失:8.1135,词正确率:90.06%
进程:73% 训练损失:9.6846,校验损失:7.4688,词正确率:91.16%
进程:74% 训练损失:9.3092,校验损失:7.5659,词正确率:90.98%
进程:75% 训练损失:9.1515,校验损失:7.4759,词正确率:91.17%
进程:76% 训练损失:8.9072,校验损失:6.7179,词正确率:92.25%
进程:77% 训练损失:8.6592,校验损失:6.2797,词正确率:92.90%
进程:78% 训练损失:8.2607,校验损失:6.2038,词正确率:92.99%
进程:79% 训练损失:8.1787,校验损失:5.8680,词正确率:93.44%
进程:80% 训练损失:7.8784,校验损失:5.7043,词正确率:93.68%
进程:81% 训练损失:7.6592,校验损失:5.3326,词正确率:94.13%
进程:82% 训练损失:7.3623,校验损失:5.1635,词正确率:94.42%
进程:83% 训练损失:7.2983,校验损失:5.0563,词正确率:94.46%
进程:84% 训练损失:7.0255,校验损失:4.7861,词正确率:94.93%
进程:85% 训练损失:6.7585,校验损失:4.8582,词正确率:94.83%
进程:86% 训练损失:6.5923,校验损失:4.4159,词正确率:95.37%
进程:87% 训练损失:6.2903,校验损失:4.5948,词正确率:95.17%
进程:88% 训练损失:6.3686,校验损失:4.1388,词正确率:95.77%
进程:89% 训练损失:6.0268,校验损失:3.8566,词正确率:96.20%
进程:90% 训练损失:5.9952,校验损失:4.2036,词正确率:95.59%
进程:91% 训练损失:5.7167,校验损失:3.7018,词正确率:96.36%
进程:92% 训练损失:5.8013,校验损失:3.4930,词正确率:96.52%
进程:93% 训练损失:5.4786,校验损失:3.5613,词正确率:96.43%
进程:94% 训练损失:5.2035,校验损失:3.2193,词正确率:96.92%
进程:95% 训练损失:5.0905,校验损失:3.2163,词正确率:96.83%
进程:96% 训练损失:5.1690,校验损失:3.1840,词正确率:96.89%
进程:97% 训练损失:4.8063,校验损失:3.6016,词正确率:96.33%
进程:98% 训练损失:4.7914,校验损失:3.0366,词正确率:97.08%
Process Process-6915:
Process Process-6919:
Process Process-6920:
Process Process-6913:
Process Process-6918:
Process Process-6917:
Process Process-6916:
Process Process-6914:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/utils/data/dataloader.py", line 35, in _worker_loop
    r = index_queue.get()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 343, in get
    res = self._reader.recv_bytes()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
KeyboardInterrupt
KeyboardInterrupt
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
KeyboardInterrupt
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-28-53ac80ed9660> in <module>()
    101         # 开始梯度下降
    102         encoder_optimizer.step()
--> 103         decoder_optimizer.step()
    104         print_loss_total += loss.data.numpy()[0]
    105 

~/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/optim/adam.py in step(self, closure)
     72                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
     73 
---> 74                 p.data.addcdiv_(-step_size, exp_avg, denom)
     75 
     76         return loss

KeyboardInterrupt: 

In [29]:
# 绘制统计指标曲线图
torch.save(encoder, 'encoder-final.mdl')
torch.save(decoder, 'decoder-final.mdl')
a = [i[0] for i in plot_losses]
b = [i[1] for i in plot_losses]
c = [i[2] * 100 for i in plot_losses]
plt.plot(a, label = 'Training Loss')
plt.plot(b, label = 'Validation Loss')
plt.plot(c, label = 'Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Loss & Accuracy')
plt.legend()


/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/Users/jake/anaconda/envs/learning_pytorch/lib/python3.5/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
Out[29]:
<matplotlib.legend.Legend at 0x11645c780>

In [32]:
# 从测试集中随机挑选20个句子来测试翻译的结果
indices = np.random.choice(range(len(test_X)), 20)
for ind in indices:
    data = [test_X[ind]]
    target = [test_Y[ind]]
    print(SentenceFromList(input_lang, data[0]))
    input_variable = Variable(torch.LongTensor(data)).cuda() if use_cuda else Variable(torch.LongTensor(data))
    # input_variable的大小:batch_size, length_seq
    target_variable = Variable(torch.LongTensor(target)).cuda() if use_cuda else Variable(torch.LongTensor(target))
    # target_variable的大小:batch_size, length_seq

    encoder_hidden = encoder.initHidden(input_variable.size()[0])

    loss = 0
    encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
    # encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
    # encoder_hidden的大小:direction*n_layer, batch_size, hidden_size

    decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
    # decoder_input大小:batch_size, length_seq
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = encoder_hidden
    # decoder_hidden大小:direction*n_layer, batch_size, hidden_size

    # Without teacher forcing: use its own predictions as the next input
    output_sentence = []
    decoder_attentions = torch.zeros(max_length, max_length)
    rights = []
    for di in range(MAX_LENGTH):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_outputs)
        #decoder_ouput大小:batch_size, output_size(vocab_size)
        topv, topi = decoder_output.data.topk(1, dim = 1)
        decoder_attentions[di] = decoder_attention.data
        #topi 尺寸:batch_size, k
        ni = topi[:, 0]
        decoder_input = Variable(ni.unsqueeze(1))
        ni = ni.numpy()[0]
        output_sentence.append(ni)
        # decoder_input大小:batch_size, length_seq
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input
        right = rightness(decoder_output, target_variable[:, di].unsqueeze(1))
        rights.append(right)
    sentence = SentenceFromList(output_lang, output_sentence)
    standard = SentenceFromList(output_lang, target[0])
    print('机器翻译:', sentence)
    print('标准翻译:', standard)
    # 输出本句话的准确率
    right_ratio = 1.0 * np.sum([i[0] for i in rights]) / np.sum([i[1] for i in rights])
    print('词准确率:', 100.0 * right_ratio)
    print('\n')


不过 , 克林顿 对 於 这样的 结果 并不 感到意外 .
机器翻译: however clinton is not surprised about this result .
标准翻译: however clinton is not surprised about this result .
词准确率: 100.0


在 这种 情况 下 , 陈 总统 其实 站在 最佳 的 历史 时机 .
机器翻译: under such circumstances president chen actually is standing the best historical historical opportunity .
标准翻译: under such circumstances president chen actually is standing before the best historical opportunity .
词准确率: 85.0


一 只 只 手 高高 举起 .
机器翻译: a hand was raised high .
标准翻译: a hand was raised high .
词准确率: 100.0


该 会 至今 成立 七 年 .
机器翻译: seven years have passed since the society was set up .
标准翻译: seven years have passed since the society was set up .
词准确率: 100.0


谈到 经济 合作 问题 , 两 人 找到 了 一些 共同 语言 .
机器翻译: on the issue of economic cooperation bush and putin shared a lot of common values .
标准翻译: on the issue of economic cooperation bush and putin shared a lot of common values .
词准确率: 100.0


这 一 方针 是 指导 我国 军事 斗争 全局 的 总 纲领 , 总 政策 .
机器翻译: this principle serves as a general program and policy general the party china s overall military struggle struggle .
标准翻译: this principle serves as a general program and a general policy which guides china s overall military struggle .
词准确率: 60.0


张宏喜 总领事 是 在 华尔道夫 饭店 举行 国庆 招待会 上 说 这番话 的 .
机器翻译: they are welcome to visit china early to see for themselves .
标准翻译: they are welcome to visit china early to see for themselves .
词准确率: 100.0


他 表示 , 在 推动 这项 计划 方面 不会 作出 妥协 .
机器翻译: he pledged that he would make no compromise when pushing ahead with this scheme .
标准翻译: he pledged that he would make no compromise when pushing ahead with this scheme .
词准确率: 100.0


首先 是 技术 支持 .
机器翻译: first of all we will give technological support .
标准翻译: first of all we will give technological support .
词准确率: 100.0


国际标准 , 还是 美国 标准 ?
机器翻译: international standards or american standards ?
标准翻译: international standards or american standards ?
词准确率: 100.0


今后 两国 经贸 合作 可望 进一步 增长 .
机器翻译: it is hoped that bilateral economic and trade cooperation will increase further in the future .
标准翻译: it is hoped that bilateral economic and trade cooperation will increase further in the future .
词准确率: 100.0


今后 应 进一步 注意 发展 生态 旅游 , 努力 开发 旅游 精品 , 形成 自己的 特色 .
机器翻译: all kinds of markets have developed relatively fast . they .
标准翻译: all kinds of markets have developed relatively fast .
词准确率: 90.0


小 王 赶忙 上前 抓住 此人 胳膊 , 夺下 油管 .
机器翻译: wang immediately seized him by the arm and snatched the oil pipe .
标准翻译: wang immediately seized him by the arm and snatched the oil pipe .
词准确率: 100.0


至 於 第二 , 第三点 , 目前 均 无法 证实 .
机器翻译: as for the second and third theories theories was to have no confirmed this this .
标准翻译: as for the second and third theories there was no way to have confirmed this .
词准确率: 70.0


在 一个 大型 机库 内 , 他 最后 一次 检阅 了 空军 仪仗队 .
机器翻译: inside a huge hangar he reviewed the guard of honor of the air force for the last time .
标准翻译: inside a huge hangar he reviewed the guard of honor of the air force for the last time .
词准确率: 100.0


实际 失业 情况 比 官方 公布 的 数字 更 严重 .
机器翻译: the number of people who once lost jobs or may lose jobs reached .
标准翻译: the number of people who once lost jobs or may lose jobs reached .
词准确率: 100.0


这是 毛泽东 , 邓小平 同志 的 一个 重要 思想 .
机器翻译: this is an important thinking of comrades mao zedong and deng xiaoping .
标准翻译: this is an important thinking of comrades mao zedong and deng xiaoping .
词准确率: 100.0


种种 迹像 表明 , 我国 钢铁 工业 出现 了 近年 少见 的 好 形势 .
机器翻译: there are indications that a rare favorable favorable favorable has emerged recently in china s iron and industry .
标准翻译: there are indications that a rare favorable situation has emerged recently in china s iron and steel industry .
词准确率: 50.0


这 对 促进 亚太地区 人力资源 能力 建设 很 有意义 .
机器翻译: this is of great significance for human human building in the asia pacific pacific region .
标准翻译: this is of great significance for promoting human capacity building in the asia pacific region .
词准确率: 70.0


第二 军事 禁区 , 军事 管理 区 的 保护
机器翻译: chapter ii .
标准翻译: chapter ii .
词准确率: 100.0



In [33]:
# 通过几个特殊的句子翻译,考察注意力机制关注的情况
data = '人民币 汇率 继续 保持 稳定 .'
#data = '五 是 干部 交流 工作 迈出 较大 步伐 .'
data = '谈到 经济 合作 问题 , 两 人 找到 了 一些 共同 语言 .'
data = np.array([indexFromSentence(input_lang, data)])

input_variable = Variable(torch.LongTensor(data)).cuda() if use_cuda else Variable(torch.LongTensor(data))
# input_variable的大小:batch_size, length_seq
target_variable = Variable(torch.LongTensor(target)).cuda() if use_cuda else Variable(torch.LongTensor(target))
# target_variable的大小:batch_size, length_seq

encoder_hidden = encoder.initHidden(input_variable.size()[0])

loss = 0
encoder_outputs, encoder_hidden = encoder(input_variable, encoder_hidden)
# encoder_outputs的大小:batch_size, length_seq, hidden_size*direction
# encoder_hidden的大小:direction*n_layer, batch_size, hidden_size

decoder_input = Variable(torch.LongTensor([[SOS_token]] * target_variable.size()[0]))
# decoder_input大小:batch_size, length_seq
decoder_input = decoder_input.cuda() if use_cuda else decoder_input

decoder_hidden = encoder_hidden
# decoder_hidden大小:direction*n_layer, batch_size, hidden_size

output_sentence = []
decoder_attentions = torch.zeros(max_length, max_length)
for di in range(MAX_LENGTH):
    decoder_output, decoder_hidden, decoder_attention = decoder(
        decoder_input, decoder_hidden, encoder_outputs)
    #decoder_ouput大小:batch_size, output_size(vocab_size)
    topv, topi = decoder_output.data.topk(1, dim = 1)
    
    # 在每一步,获取了注意力的权重向量,并将其存储到了decoder_attentions之中
    decoder_attentions[di] = decoder_attention.data
    #topi 尺寸:batch_size, k
    ni = topi[:, 0]
    decoder_input = Variable(ni.unsqueeze(1))
    ni = ni.numpy()[0]
    output_sentence.append(ni)
    # decoder_input大小:batch_size, length_seq
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input
    right = rightness(decoder_output, target_variable[:, di].unsqueeze(1))
    rights.append(right)
sentence = SentenceFromList(output_lang, output_sentence)
print('机器翻译:', sentence)
print('\n')


机器翻译: on the issue of economic bush bush and putin shared a lot of common values .



In [34]:
# 将每一步存储的注意力权重组合到一起就形成了注意力矩阵,绘制为图
plt.matshow(decoder_attentions.numpy())


Out[34]:
<matplotlib.image.AxesImage at 0x116bddda0>

In [ ]: