In [5]:

    
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'gray'
%matplotlib inline



In [6]:

    
# input batch size for training (default: 64)
batch_size = 64

# input batch size for testing (default: 1000)
test_batch_size = 1000

# number of epochs to train (default: 10)
epochs = 10

# learning rate (default: 0.01)
lr = 0.01

# SGD momentum (default: 0.5)
momentum = 0.5

# disables CUDA training
no_cuda = True

# random seed (default: 1)
seed = 1

# how many batches to wait before logging training status
log_interval = 10

# Setting seed for reproducibility.
torch.manual_seed(seed)

cuda = not no_cuda and torch.cuda.is_available()
print("CUDA: {}".format(cuda))









    



CUDA: False



In [7]:

    
if cuda:
    torch.cuda.manual_seed(seed)
cudakwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

mnist_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)) # Precalcualted values.
])

train_set = datasets.MNIST(
    root='data',
    train=True,
    transform=mnist_transform,
    download=True,
)

test_set = datasets.MNIST(
    root='data',
    train=False,
    transform=mnist_transform,
    download=True,
)

train_loader = torch.utils.data.DataLoader(
    dataset=train_set,
    batch_size=batch_size,
    shuffle=True,
    **cudakwargs
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_set,
    batch_size=test_batch_size,
    shuffle=True,
    **cudakwargs
)

Loading the model.

Here we will focus only on nn.Sequential model types as they are easier to deal with. Generalizing the methods described here to nn.Module will require more work.



In [41]:

    
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)
    
    def __str__(self):
        return 'Flatten()'

model = nn.Sequential(OrderedDict([
    ('conv2d_1', nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3)),
    ('relu_1', nn.ReLU()),
    ('max_pooling2d_1', nn.MaxPool2d(kernel_size=2)),
    ('conv2d_2', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)),
    ('relu_2', nn.ReLU()),
    ('dropout_1', nn.Dropout(p=0.25)),
    ('flatten_1', Flatten()),
    ('dense_1', nn.Linear(3872, 64)),
    ('relu_3', nn.ReLU()),
    ('dropout_2', nn.Dropout(p=0.5)),
    ('dense_2', nn.Linear(64, 10)),
    ('readout', nn.LogSoftmax())
]))

model.load_state_dict(torch.load('example_torch_mnist_model.pth'))

Accessing the layers

A torch.nn.Sequential module serves itself as an iterable and subscriptable container for all its children modules.



In [42]:

    
for i, layer in enumerate(model):
    print('{}\t{}'.format(i, layer))









    



0	Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
1	ReLU ()
2	MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
3	Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
4	ReLU ()
5	Dropout (p = 0.25)
6	Flatten()
7	Linear (3872 -> 64)
8	ReLU ()
9	Dropout (p = 0.5)
10	Linear (64 -> 10)
11	LogSoftmax ()

Moreover .modules and .children provide generators for accessing layers.



In [22]:

    
for m in model.modules():
    print(m)









    



Sequential (
  (conv2d_1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu_1): ReLU ()
  (max_pooling2d_1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2d_2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (relu_2): ReLU ()
  (dropout_1): Dropout (p = 0.25)
  (flatten_1): Flatten (
  )
  (dense_1): Linear (3872 -> 64)
  (relu_3): ReLU ()
  (dropout_2): Dropout (p = 0.5)
  (dense_2): Linear (64 -> 10)
  (readout): LogSoftmax ()
)
Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
ReLU ()
MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
ReLU ()
Dropout (p = 0.25)
Flatten (
)
Linear (3872 -> 64)
ReLU ()
Dropout (p = 0.5)
Linear (64 -> 10)
LogSoftmax ()



In [23]:

    
for c in model.children():
    print(c)









    



Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
ReLU ()
MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
ReLU ()
Dropout (p = 0.25)
Flatten (
)
Linear (3872 -> 64)
ReLU ()
Dropout (p = 0.5)
Linear (64 -> 10)
LogSoftmax ()

Getting the weigths.



In [26]:

    
conv2d_1_weight = model[0].weight.data.numpy()
conv2d_1_weight.shape









    Out[26]:





(32, 1, 3, 3)



In [27]:

    
for i in range(32):
    plt.imshow(conv2d_1_weight[i, 0])
    plt.show()

Getting layer properties

The layer objects themselfs expose most properties as attributes.



In [30]:

    
conv2d_1 = model[0]



In [32]:

    
conv2d_1.kernel_size









    Out[32]:





(3, 3)



In [34]:

    
conv2d_1.stride









    Out[34]:





(1, 1)



In [33]:

    
conv2d_1.dilation









    Out[33]:





(1, 1)



In [35]:

    
conv2d_1.in_channels, conv2d_1.out_channels









    Out[35]:





(1, 32)



In [36]:

    
conv2d_1.padding









    Out[36]:





(0, 0)



In [31]:

    
conv2d_1.output_padding









    Out[31]:





(0, 0)



In [43]:

    
dropout_1 = model[5]



In [44]:

    
dropout_1.p









    Out[44]:





0.25



In [45]:

    
dense_1 = model[7]



In [46]:

    
dense_1.in_features, dense_1.out_features









    Out[46]:





(3872, 64)