In this lab, you will test Sigmoid, Tanh and Relu activations functions on the MNIST dataset
You'll need the following libraries:
In [19]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import torch.nn.functional as F
import matplotlib.pylab as plt
import numpy as np
torch.manual_seed(0)
define the neural network module or class He Initialization
In [31]:
class Net_He(nn.Module):
def __init__(self,Layers):
super(Net_He,self).__init__()
self.hidden = nn.ModuleList()
for input_size,output_size in zip(Layers,Layers[1:]):
linear=nn.Linear(input_size,output_size)
torch.nn.init.kaiming_uniform_(linear.weight,nonlinearity='relu')
self.hidden.append(nn.Linear(input_size,output_size))
def forward(self,x):
L=len(self.hidden)
for (l,linear_transform) in zip(range(L),self.hidden):
if l<L-1:
x =F.relu(linear_transform (x))
else:
x =linear_transform (x)
return x
Class or Neral Network with Uniform Initialization
In [32]:
class Net_Uniform(nn.Module):
def __init__(self,Layers):
super(Net_Uniform,self).__init__()
self.hidden = nn.ModuleList()
for input_size,output_size in zip(Layers,Layers[1:]):
linear=nn.Linear(input_size,output_size)
linear.weight.data.uniform_(0, 1)
#inear.weight.data.
self.hidden.append(linear)
def forward(self,x):
L=len(self.hidden)
for (l,linear_transform) in zip(range(L),self.hidden):
if l<L-1:
x =F.relu(linear_transform (x))
else:
x =linear_transform (x)
return x
Class or Neral Network with Pytroch Default Initialization
In [33]:
class Net(nn.Module):
def __init__(self,Layers):
super(Net,self).__init__()
self.hidden = nn.ModuleList()
for input_size,output_size in zip(Layers,Layers[1:]):
linear=nn.Linear(input_size,output_size)
#inear.weight.data.
self.hidden.append(linear)
def forward(self,x):
L=len(self.hidden)
for (l,linear_transform) in zip(range(L),self.hidden):
if l<L-1:
x =F.relu(linear_transform (x))
else:
x =linear_transform (x)
return x
define a function to train the model, in this case the function returns a Python dictionary to store the training loss and accuracy on the validation data
In [34]:
def train(model,criterion, train_loader,validation_loader, optimizer, epochs=100):
i=0
useful_stuff={'training_loss':[],'validation_accuracy':[]}
#n_epochs
for epoch in range(epochs):
for i,(x, y) in enumerate(train_loader):
#clear gradient
optimizer.zero_grad()
#make a prediction logits
z=model(x.view(-1,28*28))
# calculate loss
loss=criterion(z,y)
# calculate gradients of parameters
loss.backward()
# update parameters
optimizer.step()
useful_stuff['training_loss'].append(loss.data.item())
correct=0
for x, y in validation_loader:
#perform a prediction on the validation data
yhat=model(x.view(-1,28*28))
_,lable=torch.max(yhat,1)
correct+=(lable==y).sum().item()
accuracy=100*(correct/len(validation_dataset))
useful_stuff['validation_accuracy'].append(accuracy)
return useful_stuff
Load the training dataset by setting the parameters train
to True
and convert it to a tensor by placing a transform object int the argument transform
In [35]:
train_dataset=dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
Load the testing dataset by setting the parameters train False
and convert it to a tensor by placing a transform object int the argument transform
In [36]:
validation_dataset=dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
create the criterion function
In [37]:
criterion=nn.CrossEntropyLoss()
create the training-data loader and the validation-data loader object
In [38]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=2000,shuffle=True)
validation_loader=torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=5000,shuffle=False)
create the criterion function
In [39]:
criterion=nn.CrossEntropyLoss()
create a list that contains layer size
In [40]:
input_dim=28*28
output_dim=10
layers=[input_dim,100,200,100,output_dim]
print the model parameters
train the network using Pytroch Default Initialization
In [41]:
model=Net(layers)
learning_rate=0.01
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)
training_results=train(model,criterion, train_loader,validation_loader, optimizer, epochs=30)
train the network using He Initialization function
In [42]:
model_He=Net_He(layers)
optimizer=torch.optim.SGD(model_He.parameters(),lr=learning_rate)
training_results_He=train(model_He,criterion, train_loader,validation_loader, optimizer, epochs=30)
train the network using the Relu activations function
In [43]:
model_Uniform=Net_Uniform(layers)
optimizer=torch.optim.SGD(model_Uniform.parameters(),lr=learning_rate)
training_results_Uniform=train(model_Uniform,criterion, train_loader,validation_loader, optimizer, epochs=30)
compare the training loss for each activation
In [44]:
plt.plot(training_results_He['training_loss'],label='He')
plt.plot(training_results['training_loss'],label='default ')
plt.plot(training_results_Uniform['training_loss'],label='Uniform')
plt.ylabel('loss')
plt.title('training loss iterations')
plt.legend()
Out[44]:
compare the validation loss for each model
In [45]:
plt.plot(training_results_He['validation_accuracy'],label='He')
plt.plot(training_results['validation_accuracy'],label='default ')
plt.plot(training_results_Uniform['validation_accuracy'],label='Uniform')
plt.ylabel('validation accuracy')
plt.xlabel('epochs ')
plt.legend()
Out[45]:
Joseph Santarcangelo has a PhD in Electrical Engineering. His research focused on using machine learning, signal processing, and computer vision to determine how videos impact human cognition.
Other contributors: Michelle Carey