In [20]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

from time import time
import copy

In [50]:
seq_len = 10
batch_size = 3
hidden_size = 5
num_layers = 1

Prepare Data


In [51]:
inputs = Variable(torch.randn(batch_size, seq_len, hidden_size)).cuda()
labels = Variable(torch.ones(batch_size, seq_len).long()).cuda()

inputs.size(), labels.size()


Out[51]:
(torch.Size([3, 10, 5]), torch.Size([3, 10]))

Define model, Loss function, optimizer


In [52]:
rnn = nn.RNN( hidden_size, hidden_size, num_layers, batch_first=True).cuda()
h_n = Variable(torch.randn(1, batch_size, hidden_size)).cuda()

loss_fn = CrossEntropyLoss()
opt = SGD(rnn.parameters(), lr=0.01)

RNN module


In [55]:
start = time()

for i in range(1000):
    loss = 0
    
    out, last_h = rnn(inputs, h_n)
    
    # out: [batch_size, seq_len, hidden_size]
    # lables: [batch_size, seq_len]
    
    for j in range(seq_len):
        loss += loss_fn(out[:,j,:], labels[:,j])
        
    opt.zero_grad()
    loss.backward()
    opt.step()
    
    if (i+1) % 100 == 0:
        print(loss)

print(f'{time() - start:.2f}')


Variable containing:
 4.6870
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.4893
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.4324
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.4055
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3898
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3793
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3719
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3664
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3621
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3586
[torch.cuda.FloatTensor of size 1 (GPU 0)]

7.64

RNNCell


In [57]:
rnncell = nn.RNNCell(hidden_size, hidden_size).cuda()
loss_fn = CrossEntropyLoss()
opt = SGD(rnncell.parameters(), lr=0.01)

In [58]:
inputs = Variable(torch.randn(batch_size,seq_len, hidden_size)).cuda()
labels = Variable(torch.ones(batch_size, seq_len).long()).cuda()
h = Variable(torch.randn(batch_size,hidden_size)).cuda()

In [62]:
start = time()
for i in range(1000):
    loss = 0
    
    h_next = Variable(h.data.new(batch_size,hidden_size)).cuda()

    for j in range(seq_len):
        h_next = rnncell(inputs[:,j,:], h_next)
        loss += loss_fn(h_next, labels[:, idx])
        
    opt.zero_grad()
    loss.backward()
    opt.step()

    if (i+1) % 100 == 0:
        print(loss)

print(f'{time() - start:.2f}')


Variable containing:
 4.3610
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3583
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3560
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3540
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3522
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3507
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3494
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3482
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3471
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.3461
[torch.cuda.FloatTensor of size 1 (GPU 0)]

12.03

In [ ]: