RNN(LSTM)를 이용한 Minist 예제


In [1]:
%matplotlib inline

1. 입력DataLoader 설정

  • train 데이터로 loader를 지정 (dataset은 Minist, batch 사이즈 50, shuffle를 실행)
  • test 데이터로 loader를 지정 (dataset은 Minist, batch 사이즈 1000)

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np

is_cuda = torch.cuda.is_available() # cuda 사용가능시, True

batch_size = 50
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, transform=transforms.ToTensor()),
    batch_size=1000)


Files already downloaded

2. 사전 설정

* model
* loss
* opimizer

In [3]:
##################
# option variables 
##################
learning_rate = 1e-2
training_epoch = 2
batch_size = 100
# neural network properties 
n_hidden = 128  # number of latent 
n_layers = 2  # number of latent 
n_sequence = 28   # Image pixel number in row

class MnistModel(nn.Module):
    def __init__(self):
        super(MnistModel, self).__init__()
        # input is 28x28
        self.rnn = nn.LSTM(n_sequence, n_hidden, n_layers, batch_first=True)
        self.fc = nn.Linear(n_hidden, 10)
        
    def forward(self, x):
        # Set initial states 
        h0 = Variable(torch.zeros(n_layers, x.size(0), n_hidden))
        c0 = Variable(torch.zeros(n_layers, x.size(0), n_hidden))
        if is_cuda :  h0, c0 = h0.cuda(), c0.cuda()
        x, _ = self.rnn(x, (h0, c0))
        x = self.fc(x[:, -1 ,:])
        return F.log_softmax(x)
    
model = MnistModel()
if is_cuda :  model.cuda()
loss_fn = nn.NLLLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

3. Trainning loop

* (입력 생성)
* model 생성
* loss 생성
* zeroGrad
* backpropagation
* optimizer step (update model parameter)

In [4]:
# trainning
model.train()
train_loss = []
train_accu = []

for epoch in range(3):
    for i, (image, target) in enumerate(train_loader):
        image = image.squeeze()
        image, target = Variable(image), Variable(target) # 입력image Target 설정 
        if is_cuda :  image, target = image.cuda(), target.cuda() 
        output = model(image) # model 생성
        loss = loss_fn(output, target) #loss 생성
        optimizer.zero_grad() # zero_grad
        loss.backward() # calc backward grad
        optimizer.step() # update parameter
        
        pred = output.data.max(1)[1]
        accuracy = pred.eq(target.data).sum()/batch_size
        
        train_loss.append(loss.data[0])
        train_accu.append(accuracy)

        if i % 300 == 0:
            print(i, loss.data[0])


0 2.305699348449707
300 0.22002992033958435
600 0.39025765657424927
900 0.07198648154735565
0 0.07134982198476791
300 0.08292707800865173
600 0.14626017212867737
900 0.12322460114955902
0 0.029253769665956497
300 0.10819914191961288
600 0.13862520456314087
900 0.0466989129781723

In [5]:
plt.plot(train_accu)


Out[5]:
[<matplotlib.lines.Line2D at 0x7fa0e36b9940>]

In [6]:
plt.plot(train_loss)


Out[6]:
[<matplotlib.lines.Line2D at 0x7fa0e35e80b8>]

4. Predict & Evaluate


In [7]:
model.eval()
correct = 0
for image, target in test_loader:
    image = image.squeeze()
    if is_cuda :  image, target = image.cuda(), target.cuda() 
    image, target = Variable(image, volatile=True), Variable(target)
    output = model(image)
    prediction = output.data.max(1)[1]
    correct += prediction.eq(target.data).sum()

print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))


Test set: Accuracy: 97.64%

6. plot images which failed to predict


In [8]:
model.eval()
image, target = iter(test_loader).next() #test_loader로 부터 한번만 dataset을 호출

if is_cuda :  image, target = image.cuda(), target.cuda() 
image, target = Variable(image, volatile=True), Variable(target)
output = model(image.squeeze())

## 이미지, 참값, 예측값을 numpy array로 변환 
images = image.data.cpu().numpy()
cls_true = target.data.cpu().numpy().squeeze()
prediction = output.data.max(1)[1].cpu().numpy().squeeze()

# 예측값이 참값과 틀린것을 확인
incorrect = (prediction != cls_true)

# 예측이 틀린 것만을 추출
images = images[incorrect]
cls_true = cls_true[incorrect]
prediction = prediction[incorrect]

# 에러율을 표지
print('error : {:.1%}, number ={:}'.format(incorrect.sum()/len(incorrect), incorrect.sum()))

# 틀린 것들의 이미지를 표시
tensorImg = torch.Tensor(images)
plt.imshow(torchvision.utils.make_grid(tensorImg).numpy().transpose((1,2,0)))
plt.show()

# 틀린 것들의 예측치를 표시
print('prediction :')
pred_resized = np.pad(prediction, (0, 8 - len(prediction)%8), 'constant', constant_values=(0, 0))
print(pred_resized.reshape(-1,8))
print('\n')

# 틀린 것들의 참값을 표시
print('True :')
true_resized = np.pad(cls_true, (0, 8 - len(cls_true)%8), 'constant', constant_values=(0, 0))
print(true_resized.reshape(-1,8))


error : 2.4%, number =24
prediction :
[[5 4 6 3 3 2 9 4]
 [1 1 3 3 0 2 4 6]
 [3 2 4 9 9 6 6 4]
 [0 0 0 0 0 0 0 0]]


True :
[[9 9 1 9 5 1 8 9]
 [2 7 1 8 6 3 6 8]
 [1 6 9 8 5 4 5 7]
 [0 0 0 0 0 0 0 0]]

In [ ]: