In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)


Out[1]:
<torch._C.Generator at 0x107279c18>

In [20]:
# input_size=3, hidden_size=3
lstm = nn.LSTM(3, 3)

# (seq_len, batch, input_size)
inputs = [autograd.Variable(torch.randn((1, 3))) for _ in range(5)]

# initialize the hidden state (h_0, c_0)
# h_0 (num_layers * num_directions, batch, hidden_size)
# c_0 (num_layers * num_directions, batch, hidden_size)
hidden = (autograd.Variable(torch.randn(1, 1, 3)),
          autograd.Variable(torch.randn((1, 1, 3))))

In [21]:
hidden[0].size(), hidden[1].size()


Out[21]:
(torch.Size([1, 1, 3]), torch.Size([1, 1, 3]))

In [22]:
# 時系列データの各時刻のデータを入力して出力と内部状態を取得
for i in inputs:
    # input (seq_len, batch, input_size)
    out, hidden = lstm(i.view(1, 1, -1), hidden)

In [29]:
# 時系列データをまとめて各時刻の出力(out)と最後の隠れ状態(out[-1])を取得することもできる
# inputsを (seq_len, batch, input_size) の3Dテンソルでわたせばよい
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (autograd.Variable(torch.randn(1, 1, 3)),
          autograd.Variable(torch.randn(1, 1, 3)))
out, hidden = lstm(inputs, hidden)
print(out)     # 各時刻での出力(隠れ層 h の出力)
print(hidden)  # 各時刻での隠れ層(hとc)


Variable containing:
(0 ,.,.) = 
  0.6501  0.2764 -0.5308

(1 ,.,.) = 
  0.3998  0.1103 -0.1581

(2 ,.,.) = 
  0.2620  0.3529 -0.2749

(3 ,.,.) = 
  0.2645  0.1610 -0.1415

(4 ,.,.) = 
  0.2563  0.2421 -0.3418
[torch.FloatTensor of size 5x1x3]

(Variable containing:
(0 ,.,.) = 
  0.2563  0.2421 -0.3418
[torch.FloatTensor of size 1x1x3]
, Variable containing:
(0 ,.,.) = 
  0.4816  0.3756 -0.7887
[torch.FloatTensor of size 1x1x3]
)

In [30]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    tensor = torch.LongTensor(idxs)
    return autograd.Variable(tensor)

In [31]:
training_data = [
    ('The dog ate the apple'.split(), ['DET', 'NN', 'V', 'DET', 'NN']),
    ('Everybody read that book'.split(), ['NN', 'V', 'DET', 'NN'])
]

In [32]:
training_data


Out[32]:
[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [35]:
word_to_idx = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
print(word_to_idx)


{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}

In [36]:
tag_to_idx = {'DET': 0, 'NN': 1, 'V': 2}

In [37]:
tag_to_idx


Out[37]:
{'DET': 0, 'NN': 1, 'V': 2}

In [38]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [86]:
class LSTMTagger(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim)),
                autograd.Variable(torch.zeros(1, 1, self.hidden_dim)))
    
    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        # 入力系列をまとめているので
        # lstm_outは各入力の出力系列、self.hiddenは最後の隠れ層の状態(=最後の系列出力)
        # バッチは1で固定(1文しか入れていない)
        lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
#         print('lstm_out:', lstm_out.size())
#         print('self.hidden:', self.hidden[0].size(), self.hidden[1].size())
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
#        print('tag_space:', tag_space.size())
        tag_scores = F.log_softmax(tag_space)
#        print('tag_scores:', tag_scores.size())
        return tag_scores

In [87]:
print(len(word_to_idx), len(tag_to_idx))
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_idx), len(tag_to_idx))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


9 3

In [88]:
training_data


Out[88]:
[(['The', 'dog', 'ate', 'the', 'apple'], ['DET', 'NN', 'V', 'DET', 'NN']),
 (['Everybody', 'read', 'that', 'book'], ['NN', 'V', 'DET', 'NN'])]

In [89]:
inputs = prepare_sequence(training_data[0][0], word_to_idx)

In [90]:
inputs


Out[90]:
Variable containing:
 0
 1
 2
 3
 4
[torch.LongTensor of size 5]

In [91]:
tag_scores = model(inputs)

In [92]:
tag_scores


Out[92]:
Variable containing:
-0.9989 -1.3721 -0.9724
-0.9560 -1.3876 -1.0055
-0.9805 -1.3638 -0.9964
-0.9823 -1.4125 -0.9623
-0.9295 -1.5394 -0.9397
[torch.FloatTensor of size 5x3]

In [104]:
for epoch in range(300):
    for sentence, tags in training_data:
        model.zero_grad()

        # 新しい系列が始まったのでこれまでの隠れ状態の蓄積された履歴はクリアする必要がある
        model.hidden = model.init_hidden()
        
        # 文字列の単語系列をインデックスの系列に変換
        sentence_in = prepare_sequence(sentence, word_to_idx)
        targets = prepare_sequence(tags, tag_to_idx)
        
        tag_scores = model(sentence_in)
        
        # 1つの系列データ(1文)を入力してlossを計算してbackwardする
        # ミニバッチへの拡張は?
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

In [105]:
inputs = prepare_sequence(training_data[0][0], word_to_idx)
tag_scores = model(inputs)

In [106]:
tag_scores


Out[106]:
Variable containing:
-0.1164 -2.2766 -4.9224
-5.3021 -0.0055 -7.6052
-4.2333 -5.0019 -0.0215
-0.0134 -4.7851 -5.3040
-4.7427 -0.0091 -7.9159
[torch.FloatTensor of size 5x3]

In [116]:
import numpy as np
result = np.argmax(tag_scores.data.numpy(), axis=1)
print(result)


[0 1 2 0 1]

In [ ]: