In [1]:
import torch
In [2]:
x = torch.Tensor(5, 3)
In [3]:
print(x)
In [7]:
len(x)
Out[7]:
In [8]:
x.shape
Out[8]:
In [9]:
y = torch.rand(5,3)
In [10]:
print(y)
In [12]:
print(x + y)
In [13]:
print(torch.add(x, y))
In [14]:
result = torch.Tensor(5, 3)
print(result)
In [15]:
torch.add(x, y, out=result)
print(result)
In [17]:
print('before y:', y)
y.add_(x)
print('after y:', y)
In [22]:
x.t_()
Out[22]:
x.copy_(y), x.t_()는 x가 변경되는 연산
In [23]:
# numpy 스럽게 사용 가능
print(x[:, 1])
In [25]:
print(x[:,:])
In [26]:
a = torch.ones(5)
print(a)
In [27]:
b = a.numpy()
print(b)
In [32]:
a.add_(1)
print(a)
print(b)
# a와 b가 연결되어 있음
In [29]:
print(b)
In [30]:
a.add_(2)
print(b)
In [33]:
id(a)
Out[33]:
In [34]:
id(b)
Out[34]:
In [35]:
import numpy as np
In [36]:
a = np.ones(5)
b = torch.from_numpy(a)
In [37]:
np.add(a, 1, out=a)
print(a)
print(b)
In [39]:
%%time
if torch.cuda.is_available():
x = x.cuda()
y = y.cuda()
x + y
In [40]:
torch.cuda.is_available()
Out[40]:
In [45]:
torch.cuda.current_device()
Out[45]:
In [47]:
torch.cuda.device_count()
Out[47]:
autograd.Variable
. 텐서로 랩핑되어 있음
.backward() API를 콜하기만 하면 모든 그라디언트 연산을 자동으로 해줌
Function은 상호연결되어 있어서 비순환 그래프를 만듬.
In [49]:
import torch
from torch.autograd import Variable
In [57]:
x = Variable(torch.ones(2, 2), requires_grad=True)
print(x)
In [58]:
y = x + 2
print(y)
In [59]:
print(y.grad_fn)
# y는 연산의 결과라서 grad_fn이 잇음
In [60]:
z = y * y * 3
out = z.mean()
print(z, out)
In [62]:
print(x.grad)
In [63]:
out.backward()
In [64]:
print(x.grad)
In [65]:
x = torch.randn(3)
x = Variable(x, requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
y = y * 2
print(y)
In [66]:
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)
print(x.grad)
In [68]:
dtype = torch.FloatTensor
N, D_in, H, D_out = 64, 1000, 100, 10
Define the neural network that has some learnable parameters (or weights)
Iterate over a dataset of inputs
Process input through the network
Compute the loss (how far is the output from being correct)
Propagate gradients back into the network’s parameters
Update the weights of the network, typically using a simple update rule:
weight = weight - learning_rate * gradient
In [73]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # 2x2 windown max pooling
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
net = Net()
In [74]:
print(net)
In [75]:
params = list(net.parameters())
In [76]:
print(len(params))
print(params[0].size())
In [77]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)
In [78]:
net.zero_grad()
out.backward(torch.randn(1, 10))
In [79]:
output = net(input)
target = Variable(torch.arange(1, 11)) # a dummy target, for example
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)
In [80]:
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU
In [81]:
net.zero_grad() # zeroes the gradient buffers of all parameters
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)
In [82]:
learning_rate = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate)
In [83]:
import torch.optim as optim
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)
# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update
In [84]:
import torch
import torchvision
import torchvision.transforms as transforms
In [85]:
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
In [86]:
import matplotlib.pyplot as plt
import numpy as np
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()
# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
In [89]:
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
In [90]:
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# wrap them in Variable
# inputs, labels = Variable(inputs), Variable(labels)
inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.data[0]
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
In [91]:
dataiter = iter(testloader)
images, labels = dataiter.next()
# print images
# imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
In [92]:
outputs = net(Variable(images))
In [93]:
_, predicted = torch.max(outputs.data, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]]
for j in range(4)))
In [94]:
correct = 0
total = 0
for data in testloader:
images, labels = data
outputs = net(Variable(images))
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
In [95]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
for data in testloader:
images, labels = data
outputs = net(Variable(images))
_, predicted = torch.max(outputs.data, 1)
c = (predicted == labels).squeeze()
for i in range(4):
label = labels[i]
class_correct[label] += c[i]
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (
classes[i], 100 * class_correct[i] / class_total[i]))
In [ ]: