Backprop demo on a single unit linear MLP

This is a demonstration of how backpropagation works on a single unit MLP


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

A single neuron/unit MLP with linear activation


In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(1, 1)
    
    def forward(self, x):
        x = self.fc1(x)
        return x

Build a network. Model a function: y = 2*x + 1

Perform supervised learning using the following dataset:

Step x y
0 0. 1.
1 1. 3.

At each step, perform backprop and print the gradients.


In [11]:
net = Net()

# for ease of backprop demo, we set weight=1.0 and bias=0.0
nn.init.constant_(net.fc1.weight, 1)
nn.init.constant_(net.fc1.bias, 0)
print("---------------------0-----------------------")
print("0. Bias grad: ", net.fc1.bias.grad)
print("0. Weights grad: ", net.fc1.weight.grad)
print("0. Bias: ", net.fc1.bias[0].item())
print("0. Weights: ", net.fc1.weight[0].item())

# x=0, y=1.
input = torch.tensor([0.])
print("0. Input: ", input.item())
input.unsqueeze(0)
output = net(input)
print("0. Predicted Output: ", output.item())
target = torch.tensor([1.])
target = target.view(1, -1)

# Use MSE Loss
criterion = nn.MSELoss()
loss = criterion(output, target)
print("0. Loss: ", loss.item())

# Use SGD optimizer with learning rate of 0.1
optimizer = optim.SGD(net.parameters(), lr=0.1)
# Clear optimizer gradient
optimizer.zero_grad()
# Perform backprop
loss.backward(retain_graph=True)
# Update weight and bias
optimizer.step()
print("\n---------------------1-----------------------")
print("1. Bias grad: ", net.fc1.bias.grad.item())
print("1. Weights grad: ",net.fc1.weight.grad.item())
print("1. Bias: ", net.fc1.bias[0].item())
print("1. Weights: ",net.fc1.weight[0].item())

# x=1.0, y=3.0
input = torch.tensor([1.])
print("1. Input: ", input.item())
input.unsqueeze(0)
output = net(input)
print("1. Predicted Output: ", output.item())
target = torch.tensor([3.])
target = target.view(1, -1)

optimizer.zero_grad()
loss = criterion(output, target)
print("1. Loss: ", loss.item())
loss.backward()
optimizer.step()
print("\n---------------------2-----------------------")
print("2. Bias grad: ", net.fc1.bias.grad.item())
print("2. Weights grad: ",net.fc1.weight.grad.item())
print("2. Bias: ", net.fc1.bias[0].item())
print("2. Weights: ",net.fc1.weight[0].item())


---------------------0-----------------------
0. Bias grad:  None
0. Weights grad:  None
0. Bias:  0.0
0. Weights:  1.0
0. Input:  0.0
0. Predicted Output:  0.0
0. Loss:  1.0

---------------------1-----------------------
1. Bias grad:  -2.0
1. Weights grad:  0.0
1. Bias:  0.20000000298023224
1. Weights:  1.0
1. Input:  1.0
1. Predicted Output:  1.2000000476837158
1. Loss:  3.239999771118164

---------------------2-----------------------
2. Bias grad:  -3.5999999046325684
2. Weights grad:  -3.5999999046325684
2. Bias:  0.5600000023841858
2. Weights:  1.3600000143051147

In [ ]: