Based on the code from PyTorch / examples.
In [60]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'gray'
%matplotlib inline
In [61]:
# input batch size for training (default: 64)
batch_size = 64
# input batch size for testing (default: 1000)
test_batch_size = 1000
# number of epochs to train (default: 10)
epochs = 10
# learning rate (default: 0.01)
lr = 0.01
# SGD momentum (default: 0.5)
momentum = 0.5
# disables CUDA training
no_cuda = True
# random seed (default: 1)
seed = 1
# how many batches to wait before logging training status
log_interval = 10
# Setting seed for reproducibility.
torch.manual_seed(seed)
cuda = not no_cuda and torch.cuda.is_available()
print("CUDA: {}".format(cuda))
Setting up the data loaders
In [62]:
if cuda:
torch.cuda.manual_seed(seed)
cudakwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
mnist_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) # Precalcualted values.
])
train_set = datasets.MNIST(
root='data',
train=True,
transform=mnist_transform,
download=True,
)
test_set = datasets.MNIST(
root='data',
train=False,
transform=mnist_transform,
download=True,
)
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True,
**cudakwargs
)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=test_batch_size,
shuffle=True,
**cudakwargs
)
The function train implements one epoch of training. It will loop through the training data, fetching training batches and computing the model output. The predicted output is compared to the target output using the negative log likelihood function torch.nn.functional.nll_loss().
In [63]:
def train(model, loader, optimizer, epoch, log_interval=100):
model.train() # Set model to training mode.
for batch_idx, (data, target) in enumerate(loader): # Getting the next batch.
if cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad() # Setting gradients to zero, to avoid accumulation.
output = model.forward(data) # Passing data through the network.
loss = F.nll_loss(output, target) # Calculating the loss.
loss.backward() # Compute gradients.
optimizer.step() # Update weights.
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.data[0]))
Implementing the test procedure
In [64]:
def test(model, loader):
model.eval()
test_loss = 0
correct = 0
for data, target in loader:
if cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.cross_entropy(output, target).data[0]
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data).cpu().sum()
test_loss = test_loss
test_loss /= len(loader) # loss function already averages over batch size
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(loader.dataset),
100. * correct / len(loader.dataset)))
In [65]:
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv2d_1 = nn.Conv2d(1, 32, kernel_size=3)
self.conv2d_2 = nn.Conv2d(32, 32, kernel_size=3)
self.dense_1 = nn.Linear(3872, 64)
self.dense_2 = nn.Linear(64, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv2d_1(x), kernel_size=2))
x = F.relu(self.conv2d_2(x))
x = F.dropout(x, training=self.training)
x = x.view(-1, 3872)
x = F.relu(self.dense_1(x))
x = F.dropout(x, training=self.training)
x = self.dense_2(x)
return F.log_softmax(x)
In [66]:
model = Net()
if cuda:
model.cuda()
In [67]:
for p in model.parameters():
print(p.data.shape)
In [68]:
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
for epoch in range(1, 3):
train(model, train_loader, optimizer, epoch)
In [69]:
test(model, test_loader)
In [70]:
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.size(0), -1)
model_seq = nn.Sequential(OrderedDict([
('conv2d_1', nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)),
('relu_1', nn.ReLU()),
('max_pooling2d_1', nn.MaxPool2d(kernel_size=2)),
('conv2d_2', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)),
('relu_2', nn.ReLU()),
('dropout_1', nn.Dropout(p=0.25)),
('flatten_1', Flatten()),
('dense_1', nn.Linear(3872, 64)),
('relu_3', nn.ReLU()),
('dropout_2', nn.Dropout(p=0.5)),
('dense_2', nn.Linear(64, 10)),
('readout', nn.LogSoftmax())
]))
if cuda:
model_seq.cuda()
In [71]:
model.__call__
Out[71]:
In [72]:
model.forward
Out[72]:
In [73]:
for p in model_seq.parameters():
print(p.data.shape)
In [102]:
for m in model_seq.modules():
print(m)
In [107]:
for m in model_seq.children():
print(m)
In [115]:
model_seq.named_modules
Out[115]:
In [118]:
for l in model_seq:
print(l)
In [117]:
model_seq[0]
Out[117]:
In [103]:
model_seq._modules
Out[103]:
In [74]:
optimizer = torch.optim.SGD(model_seq.parameters(), lr=lr)
In [75]:
for epoch in range(1, 3):
train(model_seq, train_loader, optimizer, epoch)
In [76]:
test(model_seq, test_loader)
In [119]:
model_file = 'example_torch_mnist_model'
if cuda: model_file += '_gpu'
In [120]:
name = model_file + '.pth'
In [121]:
torch.save(model_seq.state_dict(), name)
In [90]:
model2 = Net()
if cuda:
model2.cuda()
model2.load_state_dict(torch.load(name))
In [91]:
test(model2, test_loader)
In [92]:
name = model_file + '.pth.tar'
In [93]:
model.eval()
torch.save(model, name)
In [94]:
model3 = torch.load(name)
In [95]:
test(model3, test_loader)