Convolution

https://www.coursera.org/learn/convolutional-neural-networks/lecture/4Trod/edge-detection-example



In [1]:

    
import numpy as np
np.random.seed(0)



In [2]:

    
image = np.random.randint(0, 10, size=(6, 6))



In [3]:

    
image









    Out[3]:





array([[5, 0, 3, 3, 7, 9],
       [3, 5, 2, 4, 7, 6],
       [8, 8, 1, 6, 7, 7],
       [8, 1, 5, 9, 8, 9],
       [4, 3, 0, 3, 5, 0],
       [2, 3, 8, 1, 3, 3]])



In [4]:

    
# Vertical edge detection
kernel = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]])



In [5]:

    
kernel









    Out[5]:





array([[ 1,  0, -1],
       [ 1,  0, -1],
       [ 1,  0, -1]])



In [6]:

    
from scipy.signal import convolve



In [7]:

    
# Negate the kernel
kernel_flipped = np.negative(kernel)
convolve(image, kernel_flipped, 'valid')









    Out[7]:





array([[ 10,   0, -15,  -9],
       [ 11,  -5, -14,  -3],
       [ 14,  -6, -14,   2],
       [  1,  -6,  -3,   1]])



In [8]:

    
image_example = np.array([
    [3, 0, 1, 2, 7, 4],
    [1, 5, 8, 9, 3, 1],
    [2, 7, 2, 5, 1, 3],
    [0, 1, 3, 1, 7, 8],
    [4, 2, 1, 6, 2, 8],
    [2, 4, 5, 2, 3, 9]
])



In [9]:

    
# Kernel is negated?
kernel_flipped = np.negative(kernel)
convolve(image_example, kernel_flipped, mode='valid')









    Out[9]:





array([[ -5,  -4,   0,   8],
       [-10,  -2,   2,   3],
       [  0,  -2,  -4,  -7],
       [ -3,  -2,  -3, -16]])

Naive implementation



In [10]:

    
def convolution_naive(image, kernel):
    output = np.zeros((image.shape[0] - kernel.shape[0] + 1,
                       image.shape[1] - kernel.shape[1] + 1))

    # walk over output rows
    for i in range(output.shape[0]):
        # walk over output columns
        for j in range(output.shape[1]):
            # walk over filter/kernel rows
            for k in range(kernel.shape[0]):
                # walk over filter/kernel columns
                for l in range(kernel.shape[1]):
                    image_x = i + k
                    image_y = j + l
                    output[i, j] += image[image_x, image_y] * kernel[k, l]
    
    return output



In [11]:

    
convolution_naive(image, kernel)









    Out[11]:





array([[ 10.,   0., -15.,  -9.],
       [ 11.,  -5., -14.,  -3.],
       [ 14.,  -6., -14.,   2.],
       [  1.,  -6.,  -3.,   1.]])



In [12]:

    
convolution_naive(image_example, kernel)









    Out[12]:





array([[ -5.,  -4.,   0.,   8.],
       [-10.,  -2.,   2.,   3.],
       [  0.,  -2.,  -4.,  -7.],
       [ -3.,  -2.,  -3., -16.]])

Tests the implementation



In [29]:

    
for _ in range(1000):
    shape = np.random.randint(5, 20, (1, 2))
    image_test = np.random.randint(1, 10, shape[0])
    actual = convolution_naive(image_test, kernel)
    expected = convolve(image_test, kernel_flipped, mode='valid')
    np.testing.assert_equal(actual, expected)

print('All tests passed')









    



All tests passed

Vertical edge detection



In [30]:

    
image = np.zeros((6, 6))
image[:, 0:3] = 10
image









    Out[30]:





array([[ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.]])



In [31]:

    
convolution_naive(image, kernel)









    Out[31]:





array([[  0.,  30.,  30.,   0.],
       [  0.,  30.,  30.,   0.],
       [  0.,  30.,  30.,   0.],
       [  0.,  30.,  30.,   0.]])

Horizontal edge detection



In [33]:

    
kernel_horizontal = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]])
kernel_horizontal









    Out[33]:





array([[ 1,  1,  1],
       [ 0,  0,  0],
       [-1, -1, -1]])



In [34]:

    
image = np.array([
    [10, 10, 10, 0, 0, 0],
    [10, 10, 10, 0, 0, 0],
    [10, 10, 10, 0, 0, 0],
    [0, 0, 0, 10, 10, 10],
    [0, 0, 0, 10, 10, 10],
    [0, 0, 0, 10, 10, 10],
])



In [36]:

    
convolution_naive(image, kernel_horizontal)









    Out[36]:





array([[  0.,   0.,   0.,   0.],
       [ 30.,  10., -10., -30.],
       [ 30.,  10., -10., -30.],
       [  0.,   0.,   0.,   0.]])



In [37]:

    
convolution_naive(image, kernel)









    Out[37]:





array([[  0.,  30.,  30.,   0.],
       [  0.,  10.,  10.,   0.],
       [  0., -10., -10.,   0.],
       [  0., -30., -30.,   0.]])

Sobel filter



In [38]:

    
kernel_sobel = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
kernel_sobel









    Out[38]:





array([[ 1,  0, -1],
       [ 2,  0, -2],
       [ 1,  0, -1]])

Padding

output size = n + 2 * p - f + 1 n:

same padding



In [39]:

    
def padding_for_same(filter_size):
    return (filter_size - 1) / 2



In [40]:

    
padding_for_same(3)









    Out[40]:





1.0



In [41]:

    
# even-shaped filters are problematic
# odd-shaped is the convention
padding_for_same(4)









    Out[41]:





1.5

Strided convolutions



In [74]:

    
def convolution_strided_naive(image, kernel, stride):
    output_x = int((image.shape[0] - kernel.shape[0]) / stride + 1) 
    output_y = int((image.shape[1] - kernel.shape[1]) / stride + 1)
    output = np.zeros((output_x, output_y))

    # walk over output rows
    for i in range(output.shape[0]):
        # walk over output columns
        for j in range(output.shape[1]):
            # walk over filter/kernel rows
            for k in range(kernel.shape[0]):
                # walk over filter/kernel columns
                for l in range(kernel.shape[1]):
                    image_x = i * stride + k
                    image_y = j * stride + l
                    output[i, j] += image[image_x, image_y] * kernel[k, l]
    
    return output



In [75]:

    
image = np.array([
    [2, 3, 7, 4, 6, 2, 9],
    [6, 6, 9, 8, 7, 4, 3],
    [3, 4, 8, 3, 8, 9, 7],
    [7, 8, 3, 6, 6, 3, 4],
    [4, 2, 1, 8, 3, 4, 6],
    [3, 2, 4, 1, 9, 8, 3],
    [0, 1, 3, 9, 2, 1, 4]
])



In [76]:

    
kernel = np.array([[3, 4, 4], [1, 0, 2], [-1, 0, 3]])



In [77]:

    
convolution_strided_naive(image, kernel, stride=2)









    Out[77]:





array([[  91.,  100.,   88.],
       [  69.,   91.,  117.],
       [  44.,   72.,   74.]])

Flipping the kernel

⚠ What we've done so far is technically called cross-correlation and not convolution. So what's really done in most of deep learning literature is actually cross-correlation and not convolution.

https://www.coursera.org/learn/convolutional-neural-networks/lecture/wfUhx/strided-convolutions



In [ ]:

    
def mirror(x):
    return np.flipud(np.fliplr(kernel)).T



In [129]:

    
kernel = np.array([
    [3, 4, 5],
    [1, 0, 2],
    [-1, 9, 7]
])



In [130]:

    
mirror(kernel)









    Out[130]:





array([[ 7,  2,  5],
       [ 9,  0,  4],
       [-1,  1,  3]])

The reason this is done in signal processing and some branches of math is that we get associativity properties which is lost with our previous notation. However deep learners do not care about associativity (keeps code simple and achieves the same result).

Convolution over volumes

https://www.coursera.org/learn/convolutional-neural-networks/lecture/ctQZz/convolutions-over-volume



In [138]:

    
def convolution_3d_naive(image, kernel):
    assert image.ndim == 3, "we're 3d now bro"
    assert kernel.ndim == 3, "we're 3d now bro"
    
    output = np.zeros((image.shape[0] - kernel.shape[0] + 1,
                       image.shape[1] - kernel.shape[1] + 1))

    # walk over output rows
    for i in range(output.shape[0]):
        # walk over output columns
        for j in range(output.shape[1]):
            # walk over filter/kernel rows
            for k in range(kernel.shape[0]):
                # walk over filter/kernel columns
                for l in range(kernel.shape[1]):
                    # walk over filter channels/depth
                    for m in range(kernel.shape[2]):
                        image_x = i + k
                        image_y = j + l
                        value_from_image = image[image_x, image_y, m]
                        output[i, j] += value_from_image * kernel[k, l, m]
    
    return output



In [139]:

    
image = np.random.randint(1, 10, (6, 6, 3))
kernel = np.random.randint(1, 10, (3, 3, 3))



In [141]:

    
convolution_3d_naive(image, kernel)









    Out[141]:





array([[ 674.,  653.,  656.,  662.],
       [ 516.,  604.,  617.,  695.],
       [ 623.,  493.,  581.,  621.],
       [ 637.,  588.,  593.,  521.]])