In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)
Out[1]:
In [20]:
# input_size=3, hidden_size=3
lstm = nn.LSTM(3, 3)
# (seq_len, batch, input_size)
inputs = [autograd.Variable(torch.randn((1, 3))) for _ in range(5)]
# initialize the hidden state (h_0, c_0)
# h_0 (num_layers * num_directions, batch, hidden_size)
# c_0 (num_layers * num_directions, batch, hidden_size)
hidden = (autograd.Variable(torch.randn(1, 1, 3)),
autograd.Variable(torch.randn((1, 1, 3))))
In [21]:
hidden[0].size(), hidden[1].size()
Out[21]:
In [22]:
# 時系列データの各時刻のデータを入力して出力と内部状態を取得
for i in inputs:
# input (seq_len, batch, input_size)
out, hidden = lstm(i.view(1, 1, -1), hidden)
In [29]:
# 時系列データをまとめて各時刻の出力(out)と最後の隠れ状態(out[-1])を取得することもできる
# inputsを (seq_len, batch, input_size) の3Dテンソルでわたせばよい
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (autograd.Variable(torch.randn(1, 1, 3)),
autograd.Variable(torch.randn(1, 1, 3)))
out, hidden = lstm(inputs, hidden)
print(out) # 各時刻での出力(隠れ層 h の出力)
print(hidden) # 各時刻での隠れ層(hとc)
In [30]:
def prepare_sequence(seq, to_ix):
idxs = [to_ix[w] for w in seq]
tensor = torch.LongTensor(idxs)
return autograd.Variable(tensor)
In [31]:
training_data = [
('The dog ate the apple'.split(), ['DET', 'NN', 'V', 'DET', 'NN']),
('Everybody read that book'.split(), ['NN', 'V', 'DET', 'NN'])
]
In [32]:
training_data
Out[32]:
In [35]:
word_to_idx = {}
for sent, tags in training_data:
for word in sent:
if word not in word_to_idx:
word_to_idx[word] = len(word_to_idx)
print(word_to_idx)
In [36]:
tag_to_idx = {'DET': 0, 'NN': 1, 'V': 2}
In [37]:
tag_to_idx
Out[37]:
In [38]:
EMBEDDING_DIM = 6
HIDDEN_DIM = 6
In [86]:
class LSTMTagger(nn.Module):
def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
super(LSTMTagger, self).__init__()
self.hidden_dim = hidden_dim
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
self.hidden = self.init_hidden()
def init_hidden(self):
return (autograd.Variable(torch.zeros(1, 1, self.hidden_dim)),
autograd.Variable(torch.zeros(1, 1, self.hidden_dim)))
def forward(self, sentence):
embeds = self.word_embeddings(sentence)
# 入力系列をまとめているので
# lstm_outは各入力の出力系列、self.hiddenは最後の隠れ層の状態(=最後の系列出力)
# バッチは1で固定(1文しか入れていない)
lstm_out, self.hidden = self.lstm(embeds.view(len(sentence), 1, -1), self.hidden)
# print('lstm_out:', lstm_out.size())
# print('self.hidden:', self.hidden[0].size(), self.hidden[1].size())
tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
# print('tag_space:', tag_space.size())
tag_scores = F.log_softmax(tag_space)
# print('tag_scores:', tag_scores.size())
return tag_scores
In [87]:
print(len(word_to_idx), len(tag_to_idx))
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_idx), len(tag_to_idx))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)
In [88]:
training_data
Out[88]:
In [89]:
inputs = prepare_sequence(training_data[0][0], word_to_idx)
In [90]:
inputs
Out[90]:
In [91]:
tag_scores = model(inputs)
In [92]:
tag_scores
Out[92]:
In [104]:
for epoch in range(300):
for sentence, tags in training_data:
model.zero_grad()
# 新しい系列が始まったのでこれまでの隠れ状態の蓄積された履歴はクリアする必要がある
model.hidden = model.init_hidden()
# 文字列の単語系列をインデックスの系列に変換
sentence_in = prepare_sequence(sentence, word_to_idx)
targets = prepare_sequence(tags, tag_to_idx)
tag_scores = model(sentence_in)
# 1つの系列データ(1文)を入力してlossを計算してbackwardする
# ミニバッチへの拡張は?
loss = loss_function(tag_scores, targets)
loss.backward()
optimizer.step()
In [105]:
inputs = prepare_sequence(training_data[0][0], word_to_idx)
tag_scores = model(inputs)
In [106]:
tag_scores
Out[106]:
In [116]:
import numpy as np
result = np.argmax(tag_scores.data.numpy(), axis=1)
print(result)
In [ ]: