In this notebook we bulid a convolution neural network from scratch using just numpy. If u are building any image classifier we strongly recommend u to use libraries like keras or pytorch. This code is just for learning the calculations behind a conv net.
In [0]:
import numpy as np
import sys
The convolution is the very basic operation of a convolutional neural net. The function conv() performs the convoltion operation over the image. We demonstrated the conv with stride = 1.
In [0]:
def conv_(img, conv_filter):
filter_size = conv_filter.shape[1]
result = np.zeros((img.shape))
#Looping through the image to apply the convolution operation.
for r in np.uint16(np.arange(filter_size/2.0, img.shape[0]-filter_size/2.0+1)):
for c in np.uint16(np.arange(filter_size/2.0, img.shape[1]-filter_size/2.0+1)):
curr_region = img[r-np.uint16(np.floor(filter_size/2.0)):r+np.uint16(np.ceil(filter_size/2.0)),
c-np.uint16(np.floor(filter_size/2.0)):c+np.uint16(np.ceil(filter_size/2.0))]
#Element-wise multipliplication between the current region and the filter.
curr_result = curr_region * conv_filter
conv_sum = np.sum(curr_result) #Summing the result of multiplication.
result[r, c] = conv_sum #Saving the summation in the convolution layer feature map.
#Clipping the outliers of the result matrix.
final_result = result[np.uint16(filter_size/2.0):result.shape[0]-np.uint16(filter_size/2.0), np.uint16(filter_size/2.0):result.shape[1]-np.uint16(filter_size/2.0)]
return final_result
def conv(img, conv_filter):
if len(img.shape) > 2 or len(conv_filter.shape) > 3: # Check if number of image channels matches the filter depth.
if img.shape[-1] != conv_filter.shape[-1]:
print("Error: Number of channels in both image and filter must match.")
sys.exit()
if conv_filter.shape[1] != conv_filter.shape[2]: # Check if filter dimensions are equal.
print('Error: Filter must be a square matrix. I.e. number of rows and columns must match.')
sys.exit()
if conv_filter.shape[1]%2==0: # Check if filter diemnsions are odd.
print('Error: Filter must have an odd size. I.e. number of rows and columns must be odd.')
sys.exit()
# An empty feature map to hold the output of convolving the filter(s) with the image.
feature_maps = np.zeros((img.shape[0]-conv_filter.shape[1]+1, img.shape[1]-conv_filter.shape[1]+1, conv_filter.shape[0]))
# Convolving the image by the filter(s).
for filter_num in range(conv_filter.shape[0]):
curr_filter = conv_filter[filter_num, :] # getting a filter from the bank.
if len(curr_filter.shape) > 2:
conv_map = conv_(img[:, :, 0], curr_filter[:, :, 0]) # Array holding the sum of all feature maps.
for ch_num in range(1, curr_filter.shape[-1]): # Convolving each channel with the image and summing the results.
conv_map = conv_map + conv_(img[:, :, ch_num],
curr_filter[:, :, ch_num])
else: # There is just a single channel in the filter.
conv_map = conv_(img, curr_filter)
feature_maps[:, :, filter_num] = conv_map # Holding feature map with the current filter.
return feature_maps # Returning all feature maps.
In [0]:
def pooling(feature_map, size=2, stride=2):
#Preparing the output of the pooling operation.
pool_out = np.zeros((np.uint16((feature_map.shape[0]-size+1)/stride+1), np.uint16((feature_map.shape[1]-size+1)/stride+1),feature_map.shape[-1]))
for map_num in range(feature_map.shape[-1]):
r2 = 0
for r in np.arange(0,feature_map.shape[0]-size+1, stride):
c2 = 0
for c in np.arange(0, feature_map.shape[1]-size+1, stride):
pool_out[r2, c2, map_num] = np.max([feature_map[r:r+size, c:c+size]])
c2 = c2 + 1
r2 = r2 +1
return pool_out
The ReLU activation function is implemented inside the relu layer. This layer is used as activation function in the convoltional layers. The function loops though each element in the feature map and returns the original value in the feature map if it is larger than 0 otherwise, returns 0.
In [0]:
def relu(feature_map):
#Preparing the output of the ReLU activation function.
relu_out = np.zeros(feature_map.shape)
for map_num in range(feature_map.shape[-1]):
for r in np.arange(0,feature_map.shape[0]):
for c in np.arange(0, feature_map.shape[1]):
relu_out[r, c, map_num] = np.max([feature_map[r, c, map_num], 0])
return relu_out
In [10]:
import skimage.data
import matplotlib
from matplotlib import pyplot as plt
# Reading the image
img = skimage.data.coffee()
img = skimage.color.rgb2gray(img)
# First conv layer
l1_filter = np.zeros((2,3,3))
l1_filter[0, :, :] = np.array([[[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]]])
l1_filter[1, :, :] = np.array([[[1, 1, 1],
[0, 0, 0],
[-1, -1, -1]]])
l1_feature_map = conv(img, l1_filter)
l1_feature_map_relu = relu(l1_feature_map)
l1_feature_map_relu_pool = pooling(l1_feature_map_relu, 2, 2)
# Second conv layer
l2_filter = np.random.rand(3, 5, 5, l1_feature_map_relu_pool.shape[-1])
l2_feature_map = conv(l1_feature_map_relu_pool, l2_filter)
l2_feature_map_relu = relu(l2_feature_map)
l2_feature_map_relu_pool = pooling(l2_feature_map_relu, 2, 2)
# Third conv layer
l3_filter = np.random.rand(1, 7, 7, l2_feature_map_relu_pool.shape[-1])
l3_feature_map = conv(l2_feature_map_relu_pool, l3_filter)
l3_feature_map_relu = relu(l3_feature_map)
l3_feature_map_relu_pool = pooling(l3_feature_map_relu, 2, 2)
In [6]:
# Graphing results
fig0, ax0 = matplotlib.pyplot.subplots(nrows=1, ncols=1)
ax0.imshow(img).set_cmap("gray")
ax0.set_title("Input Image")
ax0.get_xaxis().set_ticks([])
ax0.get_yaxis().set_ticks([])
plt.show()
In [7]:
# Layer 1
fig1, ax1 = matplotlib.pyplot.subplots(nrows=3, ncols=2)
ax1[0, 0].imshow(l1_feature_map[:, :, 0]).set_cmap("gray")
ax1[0, 0].get_xaxis().set_ticks([])
ax1[0, 0].get_yaxis().set_ticks([])
ax1[0, 0].set_title("L1-Map1")
ax1[0, 1].imshow(l1_feature_map[:, :, 1]).set_cmap("gray")
ax1[0, 1].get_xaxis().set_ticks([])
ax1[0, 1].get_yaxis().set_ticks([])
ax1[0, 1].set_title("L1-Map2")
ax1[1, 0].imshow(l1_feature_map_relu[:, :, 0]).set_cmap("gray")
ax1[1, 0].get_xaxis().set_ticks([])
ax1[1, 0].get_yaxis().set_ticks([])
ax1[1, 0].set_title("L1-Map1ReLU")
ax1[1, 1].imshow(l1_feature_map_relu[:, :, 1]).set_cmap("gray")
ax1[1, 1].get_xaxis().set_ticks([])
ax1[1, 1].get_yaxis().set_ticks([])
ax1[1, 1].set_title("L1-Map2ReLU")
ax1[2, 0].imshow(l1_feature_map_relu_pool[:, :, 0]).set_cmap("gray")
ax1[2, 0].get_xaxis().set_ticks([])
ax1[2, 0].get_yaxis().set_ticks([])
ax1[2, 0].set_title("L1-Map1ReLUPool")
ax1[2, 1].imshow(l1_feature_map_relu_pool[:, :, 1]).set_cmap("gray")
ax1[2, 1].get_xaxis().set_ticks([])
ax1[2, 1].get_yaxis().set_ticks([])
ax1[2, 1].set_title("L1-Map2ReLUPool")
plt.show()
In [8]:
# Layer 2
fig2, ax2 = matplotlib.pyplot.subplots(nrows=3, ncols=3)
ax2[0, 0].imshow(l2_feature_map[:, :, 0]).set_cmap("gray")
ax2[0, 0].get_xaxis().set_ticks([])
ax2[0, 0].get_yaxis().set_ticks([])
ax2[0, 0].set_title("L2-Map1")
ax2[0, 1].imshow(l2_feature_map[:, :, 1]).set_cmap("gray")
ax2[0, 1].get_xaxis().set_ticks([])
ax2[0, 1].get_yaxis().set_ticks([])
ax2[0, 1].set_title("L2-Map2")
ax2[0, 2].imshow(l2_feature_map[:, :, 2]).set_cmap("gray")
ax2[0, 2].get_xaxis().set_ticks([])
ax2[0, 2].get_yaxis().set_ticks([])
ax2[0, 2].set_title("L2-Map3")
ax2[1, 0].imshow(l2_feature_map_relu[:, :, 0]).set_cmap("gray")
ax2[1, 0].get_xaxis().set_ticks([])
ax2[1, 0].get_yaxis().set_ticks([])
ax2[1, 0].set_title("L2-Map1ReLU")
ax2[1, 1].imshow(l2_feature_map_relu[:, :, 1]).set_cmap("gray")
ax2[1, 1].get_xaxis().set_ticks([])
ax2[1, 1].get_yaxis().set_ticks([])
ax2[1, 1].set_title("L2-Map2ReLU")
ax2[1, 2].imshow(l2_feature_map_relu[:, :, 2]).set_cmap("gray")
ax2[1, 2].get_xaxis().set_ticks([])
ax2[1, 2].get_yaxis().set_ticks([])
ax2[1, 2].set_title("L2-Map3ReLU")
ax2[2, 0].imshow(l2_feature_map_relu_pool[:, :, 0]).set_cmap("gray")
ax2[2, 0].get_xaxis().set_ticks([])
ax2[2, 0].get_yaxis().set_ticks([])
ax2[2, 0].set_title("L2-Map1ReLUPool")
ax2[2, 1].imshow(l2_feature_map_relu_pool[:, :, 1]).set_cmap("gray")
ax2[2, 1].get_xaxis().set_ticks([])
ax2[2, 1].get_yaxis().set_ticks([])
ax2[2, 1].set_title("L2-Map2ReLUPool")
ax2[2, 2].imshow(l2_feature_map_relu_pool[:, :, 2]).set_cmap("gray")
ax2[2, 2].get_xaxis().set_ticks([])
ax2[2, 2].get_yaxis().set_ticks([])
ax2[2, 2].set_title("L2-Map3ReLUPool")
plt.show()
In [9]:
# Layer 3
fig3, ax3 = matplotlib.pyplot.subplots(nrows=1, ncols=3)
ax3[0].imshow(l3_feature_map[:, :, 0]).set_cmap("gray")
ax3[0].get_xaxis().set_ticks([])
ax3[0].get_yaxis().set_ticks([])
ax3[0].set_title("L3-Map1")
ax3[1].imshow(l3_feature_map_relu[:, :, 0]).set_cmap("gray")
ax3[1].get_xaxis().set_ticks([])
ax3[1].get_yaxis().set_ticks([])
ax3[1].set_title("L3-Map1ReLU")
ax3[2].imshow(l3_feature_map_relu_pool[:, :, 0]).set_cmap("gray")
ax3[2].get_xaxis().set_ticks([])
ax3[2].get_yaxis().set_ticks([])
ax3[2].set_title("L3-Map1ReLUPool")
plt.show()
Credits for this code goes to: https://towardsdatascience.com/building-convolutional-neural-network-using-numpy-from-scratch-b30aac50e50a