Minist 예제에서 Network를 구조를 마지막 fully connected (linear) 부분의 차원을 수정한 다음 재훈련을 하도록 하겠습니다. 이것을 수행하기 위해서는 다음과 같은 방법이 있는데, finetuning에 해당하는 방법3을 구현하도록 하겠습니다.
self.fc1 = nn.Linear(64*7*7, 1024) self.fc2 = nn.Linear(1024, 10)
위의 부분을 아래와 같이 수정하고
self.fc1 = nn.Linear(64*7*7, 512) self.fc2 = nn.Linear(512, 10)
그리고 trainning을 다시 수행합니다.
model = MnistModel() model.fc1 = nn.Linear(64*7*7, 512) model.fc2 = nn.Linear(512, 10)
그리고 trainning을 다시 수행합니다.
model = MnistModel() #load parameter of MnistModel checkpoint = torch.load(checkpoint_filename) model.load_state_dict(checkpoint) #modify last two layer in model = MnistModel() model.fc1 = nn.Linear(64*7*7, 512) model.fc2 = nn.Linear(512, 10) ### specify parameters to update fc_parameters = [ {'params': model.fc1.parameters()}, {'params': model.fc2.parameters()} ] ### assign parameter in optimizer optimizer = torch.optim.Adam(fc_parameters, lr=0.0001)
In [1]:
%matplotlib inline
In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
is_cuda = torch.cuda.is_available() # cuda 사용가능시, True
checkpoint_filename = 'minist.ckpt'
batch_size = 50
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, transform=transforms.ToTensor()),
batch_size=100)
In [8]:
class MnistModel(nn.Module):
def __init__(self):
super(MnistModel, self).__init__()
# input is 28x28
# padding=2 for same padding
self.conv1 = nn.Conv2d(1, 32, 5, padding=2)
# feature map size is 14*14 by pooling
# padding=2 for same padding
self.conv2 = nn.Conv2d(32, 64, 5, padding=2)
# feature map size is 7*7 by pooling
self.fc1 = nn.Linear(64*7*7, 1024)
self.fc2 = nn.Linear(1024, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, 64*7*7) # reshape Variable
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x)
model = MnistModel()
#load parameter of MnistModel
checkpoint = torch.load(checkpoint_filename)
model.load_state_dict(checkpoint)
### don't update model parameters
for param in model.parameters() :
param.requires_grad = False
#modify last two layer in model = MnistModel()
model.fc1 = nn.Linear(64*7*7, 512)
model.fc2 = nn.Linear(512, 10)
fc_parameters = [
{'params': model.fc1.parameters()},
{'params': model.fc2.parameters()}
]
optimizer = torch.optim.Adam(fc_parameters, lr=0.0001)
if is_cuda : model.cuda()
loss_fn = nn.NLLLoss()
In [9]:
# trainning
model.train()
train_loss = []
train_accu = []
for epoch in range(3):
for i, (image, target) in enumerate(train_loader):
if is_cuda : image, target = image.cuda(), target.cuda()
image, target = Variable(image), Variable(target) # 입력image Target 설정
output = model(image) # model 생성
loss = loss_fn(output, target) #loss 생성
optimizer.zero_grad() # zero_grad
loss.backward() # calc backward grad
optimizer.step() # update parameter
pred = output.data.max(1)[1]
accuracy = pred.eq(target.data).sum()/batch_size
train_loss.append(loss.data[0])
train_accu.append(accuracy)
if i % 300 == 0:
print(i, loss.data[0])
In [10]:
plt.plot(train_accu)
Out[10]:
In [11]:
plt.plot(train_loss)
Out[11]:
In [12]:
model.eval()
correct = 0
for image, target in test_loader:
if is_cuda : image, target = image.cuda(), target.cuda()
image, target = Variable(image, volatile=True), Variable(target)
output = model(image)
prediction = output.data.max(1)[1]
correct += prediction.eq(target.data).sum()
print('\nTest set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))