In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'gray'
%matplotlib inline

Loading the data


In [2]:
# input batch size for training (default: 64)
batch_size = 64

# input batch size for testing (default: 1000)
test_batch_size = 1000

# number of epochs to train (default: 10)
epochs = 10

# learning rate (default: 0.01)
lr = 0.01

# SGD momentum (default: 0.5)
momentum = 0.5

# disables CUDA training
no_cuda = True

# random seed (default: 1)
seed = 1

# how many batches to wait before logging training status
log_interval = 10

# Setting seed for reproducibility.
torch.manual_seed(seed)

cuda = not no_cuda and torch.cuda.is_available()
print("CUDA: {}".format(cuda))


CUDA: False

In [3]:
if cuda:
    torch.cuda.manual_seed(seed)
cudakwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

mnist_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)) # Precalcualted values.
])

train_set = datasets.MNIST(
    root='data',
    train=True,
    transform=mnist_transform,
    download=True,
)

test_set = datasets.MNIST(
    root='data',
    train=False,
    transform=mnist_transform,
    download=True,
)

train_loader = torch.utils.data.DataLoader(
    dataset=train_set,
    batch_size=batch_size,
    shuffle=True,
    **cudakwargs
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_set,
    batch_size=test_batch_size,
    shuffle=True,
    **cudakwargs
)

In [4]:
def test(model, loader):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.cross_entropy(output, target).data[0]
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(loader) # loss function already averages over batch size

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(loader.dataset),
        100. * correct / len(loader.dataset)))

Method 1

If only the state_dict was saved, we still need the source code for the model.


In [6]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv2d_1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2d_2 = nn.Conv2d(32, 32, kernel_size=3)
        self.dense_1 = nn.Linear(3872, 64)
        self.dense_2 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv2d_1(x), kernel_size=2))
        x = F.relu(self.conv2d_2(x))
        x = F.dropout(x, training=self.training)
        x = x.view(-1, 3872)
        x = F.relu(self.dense_1(x))
        x = F.dropout(x, training=self.training)
        x = self.dense_2(x)
        return F.log_softmax(x)

In [18]:
# Create the model.
model = Net()

In [19]:
model.load_state_dict(
    torch.load('torch_mnist.pth')
)

In [20]:
test(model, test_loader)


Test set: Average loss: 0.0856, Accuracy: 9732/10000 (97%)

Method 2

If the model and the parameters were saved we should in theory just need to load one file. However, this will fail, if the Net class was not defined before.


In [7]:
model = torch.load('torch_mnist.pth.tar')

In [8]:
test(model, test_loader)


Test set: Average loss: 0.0856, Accuracy: 9732/10000 (97%)