In this lab, we will use a Convolutional Neral Networks to classify handwritten digits from the MNIST database
In [9]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pylab as plt
import numpy as np
plot_channels
:
plot out the kernel parameters of each channel
In [10]:
def plot_channels(W):
#number of output channels
n_out=W.shape[0]
#number of input channels
n_in=W.shape[1]
w_min=W.min().item()
w_max=W.max().item()
fig, axes = plt.subplots(n_out,n_in)
fig.subplots_adjust(hspace = 0.1)
out_index=0
in_index=0
#plot outputs as rows inputs as columns
for ax in axes.flat:
if in_index>n_in-1:
out_index=out_index+1
in_index=0
ax.imshow(W[out_index,in_index,:,:], vmin=w_min, vmax=w_max, cmap='seismic')
ax.set_yticklabels([])
ax.set_xticklabels([])
in_index=in_index+1
plt.show()
plot_parameters
: plot out the kernel parameters of each channel
In [11]:
def plot_parameters (W,number_rows =1,name="",i=0):
W=W.data[:,i,:,:]
n_filters=W.shape[0]
w_min=W.min().item()
w_max=W.max().item()
fig, axes = plt.subplots(number_rows, n_filters//number_rows)
fig.subplots_adjust(hspace = 0.4)
for i,ax in enumerate(axes.flat):
if i< n_filters:
# Set the label for the sub-plot.
ax.set_xlabel( "kernel:{0}".format(i+1))
# Plot the image.
ax.imshow(W[i,:], vmin=w_min, vmax=w_max, cmap='seismic')
ax.set_xticks([])
ax.set_yticks([])
plt.suptitle(name, fontsize=10)
plt.show()
plot_activation
: plot out the activations of the Convolutional layers
In [43]:
def plot_activations(A,number_rows= 1,name="",i=0):
A=A[0,:,:,:].detach().numpy()
n_activations=A.shape[0]
A_min=A.min().item()
A_max=A.max().item()
fig, axes = plt.subplots(number_rows, n_activations//number_rows)
fig.subplots_adjust(hspace = 0.4)
for i,ax in enumerate(axes.flat):
if i< n_activations:
# Set the label for the sub-plot.
ax.set_xlabel( "activation:{0}".format(i+1))
# Plot the image.
ax.imshow(A[i,:], vmin=A_min, vmax=A_max, cmap='seismic')
ax.set_xticks([])
ax.set_yticks([])
plt.show()
show_data
: plot out data sample
In [13]:
def show_data(data_sample):
plt.imshow(data_sample[0].numpy().reshape(28,28),cmap='gray')
#print(data_sample[1].item())
plt.title('y= '+ str(data_sample[1].item()))
Load the training dataset by setting the parameters train
to True
. The parameter transform
defines the method to convert image to tensor.
In [14]:
train_dataset=dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_dataset
Out[14]:
Load the testing dataset by setting the parameters train False
. The parameter transform
defines the method to convert image to tensor.
In [15]:
validation_dataset=dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
validation_dataset
Out[15]:
we can see the data type is long
In [16]:
train_dataset[0][1].type()
Out[16]:
Each element in the rectangular tensor corresponds to a number representing a pixel intensity as demonstrated by the following image.
Print out the third label
In [17]:
train_dataset[3][1]
Out[17]:
Plot the fourth sample
In [18]:
show_data(train_dataset[3])
The fourth sample is a "1".
Plot the third sample.
In [19]:
show_data(train_dataset[2])
Build a Convolutional Network class with two Convolutional layers and one fully connected layer. Pre-determine the size of the final output matrix. The parameters in the constructor are the number of output channels for the first and second layer.
In [20]:
class CNN(nn.Module):
def __init__(self,out_1=16,out_2=32):
super(CNN,self).__init__()
#first Convolutional layers
self.cnn1=nn.Conv2d(in_channels=1,out_channels=out_1,kernel_size=5,padding=2)
#activation function
self.relu1=nn.ReLU()
#max pooling
self.maxpool1=nn.MaxPool2d(kernel_size=2)
#second Convolutional layers
self.cnn2=nn.Conv2d(in_channels=out_1,out_channels=out_2,kernel_size=5,stride=1,padding=2)
#activation function
self.relu2=nn.ReLU()
#max pooling
self.maxpool2=nn.MaxPool2d(kernel_size=2)
#fully connected layer
self.fc1=nn.Linear(out_2*7*7,10)
def forward(self,x):
#first Convolutional layers
out=self.cnn1(x)
#activation function
out=self.relu1(out)
#max pooling
out=self.maxpool1(out)
#first Convolutional layers
out=self.cnn2(out)
#activation function
out=self.relu2(out)
#max pooling
out=self.maxpool2(out)
#flatten output
out=out.view(out.size(0),-1)
#fully connected layer
out=self.fc1(out)
return out
def activations(self,x):
#outputs activation this is not necessary just for fun
z1=self.cnn1(x)
a1=self.relu1(z1)
out=self.maxpool1(a1)
z2=self.cnn2(out)
a2=self.relu2(z2)
out=self.maxpool2(a2)
out=out.view(out.size(0),-1)
return z1,a1,z2,a2,out
There are 16 output channels for the first layer, and 32 outputs channels for the second layer
In [21]:
model=CNN(out_1=16,out_2=32)
Plot the model parameters for the kernels before training the kernels. The kernels are initialized randomly.
In [22]:
plot_parameters(model.state_dict()['cnn1.weight'],number_rows=4,name="1st layer kernels before training ")
plot_parameters(model.state_dict()['cnn2.weight'],number_rows=4,name='2nd layer kernels before training' )
Define the loss function
In [23]:
criterion=nn.CrossEntropyLoss()
Define the optimizer class
In [24]:
learning_rate=0.1
optimizer=torch.optim.SGD(model.parameters(), lr=learning_rate)
Define the dataset loader
In [25]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=100)
validation_loader=torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=5000)
Train the model and determine validation accuracy technically test accuracy (This may take a long time)
In [26]:
n_epochs=10
loss_list=[]
accuracy_list=[]
N_test=len(validation_dataset)
#n_epochs
for epoch in range(n_epochs):
for x, y in train_loader:
#clear gradient
optimizer.zero_grad()
#make a prediction
z=model(x)
# calculate loss
loss=criterion(z,y)
# calculate gradients of parameters
loss.backward()
# update parameters
optimizer.step()
correct=0
#perform a prediction on the validation data
for x_test, y_test in validation_loader:
z=model(x_test)
_,yhat=torch.max(z.data,1)
correct+=(yhat==y_test).sum().item()
accuracy=correct/N_test
accuracy_list.append(accuracy)
loss_list.append(loss.data)
Plot the loss and accuracy on the validation data:
In [27]:
fig, ax1 = plt.subplots()
color = 'tab:red'
ax1.plot(loss_list,color=color)
ax1.set_xlabel('epoch',color=color)
ax1.set_ylabel('total loss',color=color)
ax1.tick_params(axis='y', color=color)
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('accuracy', color=color)
ax2.plot( accuracy_list, color=color)
ax2.tick_params(axis='y', labelcolor=color)
fig.tight_layout()
View the results of the parameters for the Convolutional layers
In [41]:
plot_channels(model.state_dict()['cnn1.weight'])
plot_channels(model.state_dict()['cnn2.weight'])
Consider the following sample
In [28]:
show_data(train_dataset[1])
Determine the activations
In [29]:
out=model.activations(train_dataset[1][0].view(1,1,28,28))
In [30]:
A=out[1]
A.shape
Out[30]:
Plot out the first set of activations
In [33]:
plot_activations(out[0],number_rows=4,name="first feature map")
The image below is the result after applying the relu activation function
In [34]:
plot_activations(out[1],number_rows=4,name="first feature map after relu")
The image below is the result of the activation map after the second output layer.
In [53]:
plot_activations(out[2],number_rows=32//4,name="second feature map")
The image below is the result of the activation map after applying the second relu
In [54]:
plot_activations(out[3],number_rows=4,name="second feature map after relu")
we can see the result for the third sample
In [55]:
show_data(train_dataset[2])
In [117]:
out=model.activations(train_dataset[2][0].view(1,1,28,28))
In [118]:
plot_activations(out[0],number_rows=4,name="first feature map")
In [119]:
plot_activations(out[1],number_rows=4,name="first feature map ")
In [123]:
plot_activations(out[2],number_rows=4,name="second feature map ")
In [122]:
plot_activations(out[3],number_rows=4,name="second feature map ")
Plot the first five miss-classified samples:
In [60]:
count=0
for x,y in torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=1):
z=model(x)
_,yhat=torch.max(z,1)
if yhat!=y:
show_data((x,y))
plt.show()
print("yhat:",yhat)
count+=1
if count>=5:
break
Joseph Santarcangelo has a PhD in Electrical Engineering. His research focused on using machine learning, signal processing, and computer vision to determine how videos impact human cognition.
Other contributors: Michelle Carey, Mavis Zhou
Thanks to Magnus Erik Hvass Pedersen whos tutorials helped me understand convolutional Neral Network
Copyright © 2018 cognitiveclass.ai. This notebook and its source code are released under the terms of the MIT License.