In this lab, you will test sigmoid, tanh, and relu activation functions on the MNIST dataset.
You'll need the following libraries:
In [1]:
!conda install -y torchvision
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import torch.nn.functional as F
import matplotlib.pylab as plt
import numpy as np
Define the neural network module or class:
In [2]:
class Net(nn.Module):
def __init__(self,D_in,H,D_out):
super(Net,self).__init__()
self.linear1=nn.Linear(D_in,H)
self.linear2=nn.Linear(H,D_out)
def forward(self,x):
x=torch.sigmoid(self.linear1(x))
x=self.linear2(x)
return x
Define the class with the Tanh activation function:
In [3]:
class NetTanh(nn.Module):
def __init__(self,D_in,H,D_out):
super(NetTanh,self).__init__()
self.linear1=nn.Linear(D_in,H)
self.linear2=nn.Linear(H,D_out)
def forward(self,x):
x=torch.tanh(self.linear1(x))
x=self.linear2(x)
return x
Define the class for the Relu activation functiona:
In [4]:
class NetRelu(nn.Module):
def __init__(self,D_in,H,D_out):
super(NetRelu,self).__init__()
self.linear1=nn.Linear(D_in,H)
self.linear2=nn.Linear(H,D_out)
def forward(self,x):
x=F.relu(self.linear1(x))
x=self.linear2(x)
return x
Define a function to train the model. In this case, the function returns a Python dictionary to store the training loss and accuracy on the validation data.
In [5]:
def train(model,criterion, train_loader,validation_loader, optimizer, epochs=100):
i=0
useful_stuff={'training_loss':[],'validation_accuracy':[]}
#n_epochs
for epoch in range(epochs):
for i,(x, y) in enumerate(train_loader):
#clear gradient
optimizer.zero_grad()
#make a prediction logits
z=model(x.view(-1,28*28))
# calculate loss
loss=criterion(z,y)
# calculate gradients of parameters
loss.backward()
# update parameters
optimizer.step()
useful_stuff['training_loss'].append(loss.data.item())
correct=0
for x, y in validation_loader:
#perform a prediction on the validation data
yhat=model(x.view(-1,28*28))
_,lable=torch.max(yhat,1)
correct+=(lable==y).sum().item()
accuracy=100*(correct/len(validation_dataset))
useful_stuff['validation_accuracy'].append(accuracy)
return useful_stuff
Load the training dataset by setting the parameters train
to True
and convert it to a tensor by placing a transform object in the argument transform
.
In [6]:
train_dataset=dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
Load the testing dataset by setting the parameter train
to False
and convert it to a tensor by placing a transform object in the argument transform
.
In [7]:
validation_dataset=dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
Create the criterion function:
In [8]:
criterion=nn.CrossEntropyLoss()
Create the training-data loader and the validation-data loader object:
In [9]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=2000,shuffle=True)
validation_loader=torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=5000,shuffle=False)
Create the criterion function:
In [10]:
criterion=nn.CrossEntropyLoss()
Create the model with 100 hidden layers:
In [11]:
input_dim=28*28
hidden_dim=100
output_dim=10
model=Net(input_dim,hidden_dim,output_dim)
Print the model parameters:
Train the network by using the sigmoid activations function:
In [12]:
model=Net(input_dim,hidden_dim,output_dim)
learning_rate=0.01
optimizer=torch.optim.SGD(model.parameters(),lr=learning_rate)
training_results=train(model,criterion, train_loader,validation_loader, optimizer, epochs=30)
Train the network by using the Tanh activations function:
In [ ]:
model_Tanh=NetTanh(input_dim,hidden_dim,output_dim)
optimizer=torch.optim.SGD(model_Tanh.parameters(),lr=learning_rate)
training_results_tanch=train(model_Tanh,criterion, train_loader,validation_loader, optimizer, epochs=30)
Train the network by using the Relu activations function:
In [ ]:
modelRelu=NetRelu(input_dim,hidden_dim,output_dim)
optimizer=torch.optim.SGD(modelRelu.parameters(),lr=learning_rate)
training_results_relu=train(modelRelu,criterion, train_loader,validation_loader, optimizer, epochs=30)
Compare the training loss for each activation:
In [ ]:
plt.plot(training_results_tanch['training_loss'],label='tanh')
plt.plot(training_results['training_loss'],label='sim')
plt.plot(training_results_relu['training_loss'],label='relu')
plt.ylabel('loss')
plt.title('training loss iterations')
plt.legend()
Out[ ]:
Compare the validation loss for each model:
In [ ]:
plt.plot(training_results_tanch['validation_accuracy'],label='tanh')
plt.plot(training_results['validation_accuracy'],label='sigmoid')
plt.plot(training_results_relu['validation_accuracy'],label='relu')
plt.ylabel('validation accuracy')
plt.xlabel('epochs ')
plt.legend()
Out[ ]:
Joseph Santarcangelo has a PhD in Electrical Engineering. His research focused on using machine learning, signal processing, and computer vision to determine how videos impact human cognition.
Other contributors: Michelle Carey, Mavis Zhou
Copyright © 2018 cognitiveclass.ai. This notebook and its source code are released under the terms of the MIT License.