In [20]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.optim import SGD
from torch.nn import CrossEntropyLoss
from time import time
import copy
In [50]:
seq_len = 10
batch_size = 3
hidden_size = 5
num_layers = 1
In [51]:
inputs = Variable(torch.randn(batch_size, seq_len, hidden_size)).cuda()
labels = Variable(torch.ones(batch_size, seq_len).long()).cuda()
inputs.size(), labels.size()
Out[51]:
In [52]:
rnn = nn.RNN( hidden_size, hidden_size, num_layers, batch_first=True).cuda()
h_n = Variable(torch.randn(1, batch_size, hidden_size)).cuda()
loss_fn = CrossEntropyLoss()
opt = SGD(rnn.parameters(), lr=0.01)
In [55]:
start = time()
for i in range(1000):
loss = 0
out, last_h = rnn(inputs, h_n)
# out: [batch_size, seq_len, hidden_size]
# lables: [batch_size, seq_len]
for j in range(seq_len):
loss += loss_fn(out[:,j,:], labels[:,j])
opt.zero_grad()
loss.backward()
opt.step()
if (i+1) % 100 == 0:
print(loss)
print(f'{time() - start:.2f}')
In [57]:
rnncell = nn.RNNCell(hidden_size, hidden_size).cuda()
loss_fn = CrossEntropyLoss()
opt = SGD(rnncell.parameters(), lr=0.01)
In [58]:
inputs = Variable(torch.randn(batch_size,seq_len, hidden_size)).cuda()
labels = Variable(torch.ones(batch_size, seq_len).long()).cuda()
h = Variable(torch.randn(batch_size,hidden_size)).cuda()
In [62]:
start = time()
for i in range(1000):
loss = 0
h_next = Variable(h.data.new(batch_size,hidden_size)).cuda()
for j in range(seq_len):
h_next = rnncell(inputs[:,j,:], h_next)
loss += loss_fn(h_next, labels[:, idx])
opt.zero_grad()
loss.backward()
opt.step()
if (i+1) % 100 == 0:
print(loss)
print(f'{time() - start:.2f}')
In [ ]: