In [6]:
# Pytorch Library
import torch
import torch.nn.init
from torch.autograd import Variable
torch.manual_seed(777) # reproducibility
Out[6]:
In [7]:
import torchvision.utils as utils
import torchvision.datasets as dsets
import torchvision.transforms as transforms
In [27]:
# Other Python Library
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import random
In [9]:
# MNIST dataset
mnist_train = dsets.MNIST(root='data/',
train=True,
transform=transforms.ToTensor(),
download=True)
mnist_test = dsets.MNIST(root='data/',
train=False,
transform=transforms.ToTensor(),
download=True)
In [28]:
# plot one example
print(mnist_train.train_data.size()) # (60000, 28, 28)
print(mnist_train.train_labels.size()) # (60000)
idx = 0
plt.imshow(mnist_train.train_data[idx,:,:].numpy(), cmap='gray')
plt.title('%i' % mnist_train.train_labels[idx])
Out[28]:
In [10]:
# Hyper-parameters
batch_size = 100
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
batch_size=batch_size,
shuffle=True,
num_workers=1)
In [19]:
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# #예를 들어서.. data_loader로 부터 데이터 뭉치를 하나만 불러 오자
batch_images, batch_labels = next(iter(data_loader))
print(batch_images.size())
print(batch_labels.size())
# show images and print labels
imshow(utils.make_grid(batch_images))
batch_labels.numpy()
# 실제 사용 예시 for loop를 이용해서 image와 label을 읽어 온다.
# for batch_images, batch_labels in data_loader:
# print(img.size())
# print(label)
#enumerate를 사용하는 경우 image와 label을 읽는 것 뿐만 아니라 iter도 같이 계산
# for i, (batch_images, batch_labels) in enumerate(data_loader):
# print(batch_images.size())
# print(batch_labels)
Out[19]:
In [48]:
# Neural Network
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 10, bias=True)
relu = torch.nn.ReLU()
#sigmoid = torch.nn.Sigmoid()
# model
model = torch.nn.Sequential(linear1, relu, linear2)
#model.load_state_dict(torch.load('NN.pkl')) # Load the Trained Model
print(model)
In [41]:
# Softmax 함수가 Cost를 계산할 때 내장되어 있다.
cost_func = torch.nn.CrossEntropyLoss()
In [51]:
# Hyper-parameters
learning_rate = 0.001
training_epochs = 5
# Adam Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train model
for epoch in range(training_epochs):
avg_cost = 0
total_batch = len(mnist_train) // batch_size
for i, (batch_images, batch_labels) in enumerate(data_loader):
# 이미지를 [batch_size x 784] size 행렬로 변환
X = Variable(batch_images.view(-1, 28 * 28))
Y = Variable(batch_labels) # label is not one-hot encoded
optimizer.zero_grad() # Zero Gradient Container
Y_prediction = model(X) # Forward Propagation
cost = cost_func(Y_prediction, Y) # compute cost
cost.backward() # compute gradient
optimizer.step() # gradient update
avg_cost += cost / total_batch
print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))
print('Learning Finished!')
torch.save(model.state_dict(), 'NN.pkl') # Save the Model
In [50]:
model.state_dict()
Out[50]:
In [43]:
# Test the Model
correct = 0
total = 0
for images, labels in mnist_test:
images = Variable(images.view(-1, 28*28))
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += 1
correct += (predicted == labels).sum()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
In [49]:
# Get one and predict
r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])
single_prediction = model(X_single_data)
plt.imshow(X_single_data.data.view(28,28).numpy() , cmap='gray')
print("Label: ", Y_single_data.data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])
In [30]:
for i in range(20):
weight = model[0].weight[i,:].data.view(28,28)
weight = (weight - torch.min(weight))/(torch.max(weight)-torch.min(weight))
plt.imshow( weight.numpy(), cmap='gray')
plt.show()
In [ ]: