In [3]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torchvision import datasets, transforms
In [2]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
# Download and load the training data
trainset = datasets.MNIST("MNIST_data/", download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
In [11]:
# Define a feed-forward network
model = nn.Sequential(
nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 10) # logits, instead of output of the softmax
)
# Define the loss
criterion = nn.CrossEntropyLoss()
# Get data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0], -1)
In [12]:
# forward pass to get the logits
logits = model(images)
# pass the logits to criterion to get the loss
loss = criterion(logits, labels)
print(loss)
In [21]:
# Define a feed-forward network
model = nn.Sequential(
nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 10),
nn.LogSoftmax(dim=1) # dim=1 to calc softmax across columns instead of rows
)
# Define the loss
criterion = nn.NLLLoss()
# Get data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0], -1)
In [23]:
# forward pass to get the logits
log_probs = model(images)
# pass the logits to criterion to get the loss
loss = criterion(log_probs, labels)
print(loss)
In [38]:
x = torch.randn(2, 2, requires_grad=True)
print(x)
In [39]:
y = x ** 2
print(y)
print(y.grad_fn)
In [40]:
z = y.mean()
print(z)
In [41]:
print(x.grad)
calculate the gradients
In [42]:
z.backward()
In [48]:
print("grad: ", x.grad)
print("x:", x)
print("x/2: ", x / 2) # equal to gradients mathamatically = x / 2
In [54]:
print("Before backward pass: \n", model[0].weight.grad)
loss.backward()
print("After backward pass: \n", model[0].weight.grad)
In [56]:
from torch import optim
# Optimizers require the parameters to optimize and the learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01)
The general process with Pytorch:
In [63]:
print("Initial weights: \n", model[0].weight)
images, labels = next(iter(trainloader))
images = images.view(64, 784)
# !Important: Clear the gradients. otherwise, the gradients will be accumulated
optimizer.zero_grad()
# Forward pass
output = model.forward(images)
loss = criterion(output, labels)
# Backward pass
loss.backward()
print("Gradient: \n", model[0].weight.grad)
# Take a update step with the optimizer
optimizer.step()
print("Updated weights: \n", model[0].weight)
In [68]:
epochs = 5
for e in range(epochs):
running_loss = 0
for images, labels in trainloader:
# Flatten
images = images.view(images.shape[0], -1)
# !Important: Clear the gradients. otherwise, the gradients will be accumulated
optimizer.zero_grad()
# Forward pass
output = model.forward(images)
loss = criterion(output, labels)
# Backward pass
loss.backward()
# Take a update step with the optimizer
optimizer.step()
running_loss += loss.item()
else:
print(f"Training loss: {running_loss/len(trainloader)}")
In [88]:
images, labels = next(iter(trainloader))
img = images[0].view(1, -1)
# turn off gradients to speed up
with torch.no_grad():
probs = model.forward(img)
output = torch.exp(probs)
In [95]:
plt.imshow(img.view(1, 28, 28).squeeze(), cmap='gray')
Out[95]:
In [90]:
print(output.numpy())
print("predict:", np.argmax(output))