Convolution


In [1]:
import numpy as np
np.random.seed(0)

In [2]:
image = np.random.randint(0, 10, size=(6, 6))

In [3]:
image


Out[3]:
array([[5, 0, 3, 3, 7, 9],
       [3, 5, 2, 4, 7, 6],
       [8, 8, 1, 6, 7, 7],
       [8, 1, 5, 9, 8, 9],
       [4, 3, 0, 3, 5, 0],
       [2, 3, 8, 1, 3, 3]])

In [4]:
# Vertical edge detection
kernel = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]])

In [5]:
kernel


Out[5]:
array([[ 1,  0, -1],
       [ 1,  0, -1],
       [ 1,  0, -1]])

In [6]:
from scipy.signal import convolve

In [7]:
# Negate the kernel
kernel_flipped = np.negative(kernel)
convolve(image, kernel_flipped, 'valid')


Out[7]:
array([[ 10,   0, -15,  -9],
       [ 11,  -5, -14,  -3],
       [ 14,  -6, -14,   2],
       [  1,  -6,  -3,   1]])

In [8]:
image_example = np.array([
    [3, 0, 1, 2, 7, 4],
    [1, 5, 8, 9, 3, 1],
    [2, 7, 2, 5, 1, 3],
    [0, 1, 3, 1, 7, 8],
    [4, 2, 1, 6, 2, 8],
    [2, 4, 5, 2, 3, 9]
])

In [9]:
# Kernel is negated?
kernel_flipped = np.negative(kernel)
convolve(image_example, kernel_flipped, mode='valid')


Out[9]:
array([[ -5,  -4,   0,   8],
       [-10,  -2,   2,   3],
       [  0,  -2,  -4,  -7],
       [ -3,  -2,  -3, -16]])

Naive implementation


In [10]:
def convolution_naive(image, kernel):
    output = np.zeros((image.shape[0] - kernel.shape[0] + 1,
                       image.shape[1] - kernel.shape[1] + 1))

    # walk over output rows
    for i in range(output.shape[0]):
        # walk over output columns
        for j in range(output.shape[1]):
            # walk over filter/kernel rows
            for k in range(kernel.shape[0]):
                # walk over filter/kernel columns
                for l in range(kernel.shape[1]):
                    image_x = i + k
                    image_y = j + l
                    output[i, j] += image[image_x, image_y] * kernel[k, l]
    
    return output

In [11]:
convolution_naive(image, kernel)


Out[11]:
array([[ 10.,   0., -15.,  -9.],
       [ 11.,  -5., -14.,  -3.],
       [ 14.,  -6., -14.,   2.],
       [  1.,  -6.,  -3.,   1.]])

In [12]:
convolution_naive(image_example, kernel)


Out[12]:
array([[ -5.,  -4.,   0.,   8.],
       [-10.,  -2.,   2.,   3.],
       [  0.,  -2.,  -4.,  -7.],
       [ -3.,  -2.,  -3., -16.]])

Tests the implementation


In [29]:
for _ in range(1000):
    shape = np.random.randint(5, 20, (1, 2))
    image_test = np.random.randint(1, 10, shape[0])
    actual = convolution_naive(image_test, kernel)
    expected = convolve(image_test, kernel_flipped, mode='valid')
    np.testing.assert_equal(actual, expected)

print('All tests passed')


All tests passed

Vertical edge detection


In [30]:
image = np.zeros((6, 6))
image[:, 0:3] = 10
image


Out[30]:
array([[ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.],
       [ 10.,  10.,  10.,   0.,   0.,   0.]])

In [31]:
convolution_naive(image, kernel)


Out[31]:
array([[  0.,  30.,  30.,   0.],
       [  0.,  30.,  30.,   0.],
       [  0.,  30.,  30.,   0.],
       [  0.,  30.,  30.,   0.]])

Horizontal edge detection


In [33]:
kernel_horizontal = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]])
kernel_horizontal


Out[33]:
array([[ 1,  1,  1],
       [ 0,  0,  0],
       [-1, -1, -1]])

In [34]:
image = np.array([
    [10, 10, 10, 0, 0, 0],
    [10, 10, 10, 0, 0, 0],
    [10, 10, 10, 0, 0, 0],
    [0, 0, 0, 10, 10, 10],
    [0, 0, 0, 10, 10, 10],
    [0, 0, 0, 10, 10, 10],
])

In [36]:
convolution_naive(image, kernel_horizontal)


Out[36]:
array([[  0.,   0.,   0.,   0.],
       [ 30.,  10., -10., -30.],
       [ 30.,  10., -10., -30.],
       [  0.,   0.,   0.,   0.]])

In [37]:
convolution_naive(image, kernel)


Out[37]:
array([[  0.,  30.,  30.,   0.],
       [  0.,  10.,  10.,   0.],
       [  0., -10., -10.,   0.],
       [  0., -30., -30.,   0.]])

Sobel filter


In [38]:
kernel_sobel = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
kernel_sobel


Out[38]:
array([[ 1,  0, -1],
       [ 2,  0, -2],
       [ 1,  0, -1]])

Padding

output size = n + 2 * p - f + 1 n:

same padding


In [39]:
def padding_for_same(filter_size):
    return (filter_size - 1) / 2

In [40]:
padding_for_same(3)


Out[40]:
1.0

In [41]:
# even-shaped filters are problematic
# odd-shaped is the convention
padding_for_same(4)


Out[41]:
1.5

Strided convolutions


In [74]:
def convolution_strided_naive(image, kernel, stride):
    output_x = int((image.shape[0] - kernel.shape[0]) / stride + 1) 
    output_y = int((image.shape[1] - kernel.shape[1]) / stride + 1)
    output = np.zeros((output_x, output_y))

    # walk over output rows
    for i in range(output.shape[0]):
        # walk over output columns
        for j in range(output.shape[1]):
            # walk over filter/kernel rows
            for k in range(kernel.shape[0]):
                # walk over filter/kernel columns
                for l in range(kernel.shape[1]):
                    image_x = i * stride + k
                    image_y = j * stride + l
                    output[i, j] += image[image_x, image_y] * kernel[k, l]
    
    return output

In [75]:
image = np.array([
    [2, 3, 7, 4, 6, 2, 9],
    [6, 6, 9, 8, 7, 4, 3],
    [3, 4, 8, 3, 8, 9, 7],
    [7, 8, 3, 6, 6, 3, 4],
    [4, 2, 1, 8, 3, 4, 6],
    [3, 2, 4, 1, 9, 8, 3],
    [0, 1, 3, 9, 2, 1, 4]
])

In [76]:
kernel = np.array([[3, 4, 4], [1, 0, 2], [-1, 0, 3]])

In [77]:
convolution_strided_naive(image, kernel, stride=2)


Out[77]:
array([[  91.,  100.,   88.],
       [  69.,   91.,  117.],
       [  44.,   72.,   74.]])

Flipping the kernel

⚠ What we've done so far is technically called cross-correlation and not convolution. So what's really done in most of deep learning literature is actually cross-correlation and not convolution.

https://www.coursera.org/learn/convolutional-neural-networks/lecture/wfUhx/strided-convolutions


In [ ]:
def mirror(x):
    return np.flipud(np.fliplr(kernel)).T

In [129]:
kernel = np.array([
    [3, 4, 5],
    [1, 0, 2],
    [-1, 9, 7]
])

In [130]:
mirror(kernel)


Out[130]:
array([[ 7,  2,  5],
       [ 9,  0,  4],
       [-1,  1,  3]])

The reason this is done in signal processing and some branches of math is that we get associativity properties which is lost with our previous notation. However deep learners do not care about associativity (keeps code simple and achieves the same result).


In [138]:
def convolution_3d_naive(image, kernel):
    assert image.ndim == 3, "we're 3d now bro"
    assert kernel.ndim == 3, "we're 3d now bro"
    
    output = np.zeros((image.shape[0] - kernel.shape[0] + 1,
                       image.shape[1] - kernel.shape[1] + 1))

    # walk over output rows
    for i in range(output.shape[0]):
        # walk over output columns
        for j in range(output.shape[1]):
            # walk over filter/kernel rows
            for k in range(kernel.shape[0]):
                # walk over filter/kernel columns
                for l in range(kernel.shape[1]):
                    # walk over filter channels/depth
                    for m in range(kernel.shape[2]):
                        image_x = i + k
                        image_y = j + l
                        value_from_image = image[image_x, image_y, m]
                        output[i, j] += value_from_image * kernel[k, l, m]
    
    return output

In [139]:
image = np.random.randint(1, 10, (6, 6, 3))
kernel = np.random.randint(1, 10, (3, 3, 3))

In [141]:
convolution_3d_naive(image, kernel)


Out[141]:
array([[ 674.,  653.,  656.,  662.],
       [ 516.,  604.,  617.,  695.],
       [ 623.,  493.,  581.,  621.],
       [ 637.,  588.,  593.,  521.]])