In [1]:
import numpy as np
np.random.seed(0)
In [2]:
image = np.random.randint(0, 10, size=(6, 6))
In [3]:
image
Out[3]:
In [4]:
# Vertical edge detection
kernel = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]])
In [5]:
kernel
Out[5]:
In [6]:
from scipy.signal import convolve
In [7]:
# Negate the kernel
kernel_flipped = np.negative(kernel)
convolve(image, kernel_flipped, 'valid')
Out[7]:
In [8]:
image_example = np.array([
[3, 0, 1, 2, 7, 4],
[1, 5, 8, 9, 3, 1],
[2, 7, 2, 5, 1, 3],
[0, 1, 3, 1, 7, 8],
[4, 2, 1, 6, 2, 8],
[2, 4, 5, 2, 3, 9]
])
In [9]:
# Kernel is negated?
kernel_flipped = np.negative(kernel)
convolve(image_example, kernel_flipped, mode='valid')
Out[9]:
In [10]:
def convolution_naive(image, kernel):
output = np.zeros((image.shape[0] - kernel.shape[0] + 1,
image.shape[1] - kernel.shape[1] + 1))
# walk over output rows
for i in range(output.shape[0]):
# walk over output columns
for j in range(output.shape[1]):
# walk over filter/kernel rows
for k in range(kernel.shape[0]):
# walk over filter/kernel columns
for l in range(kernel.shape[1]):
image_x = i + k
image_y = j + l
output[i, j] += image[image_x, image_y] * kernel[k, l]
return output
In [11]:
convolution_naive(image, kernel)
Out[11]:
In [12]:
convolution_naive(image_example, kernel)
Out[12]:
In [29]:
for _ in range(1000):
shape = np.random.randint(5, 20, (1, 2))
image_test = np.random.randint(1, 10, shape[0])
actual = convolution_naive(image_test, kernel)
expected = convolve(image_test, kernel_flipped, mode='valid')
np.testing.assert_equal(actual, expected)
print('All tests passed')
In [30]:
image = np.zeros((6, 6))
image[:, 0:3] = 10
image
Out[30]:
In [31]:
convolution_naive(image, kernel)
Out[31]:
In [33]:
kernel_horizontal = np.array([[1, 1, 1], [0, 0, 0], [-1, -1, -1]])
kernel_horizontal
Out[33]:
In [34]:
image = np.array([
[10, 10, 10, 0, 0, 0],
[10, 10, 10, 0, 0, 0],
[10, 10, 10, 0, 0, 0],
[0, 0, 0, 10, 10, 10],
[0, 0, 0, 10, 10, 10],
[0, 0, 0, 10, 10, 10],
])
In [36]:
convolution_naive(image, kernel_horizontal)
Out[36]:
In [37]:
convolution_naive(image, kernel)
Out[37]:
In [38]:
kernel_sobel = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
kernel_sobel
Out[38]:
output size = n + 2 * p - f + 1 n:
In [39]:
def padding_for_same(filter_size):
return (filter_size - 1) / 2
In [40]:
padding_for_same(3)
Out[40]:
In [41]:
# even-shaped filters are problematic
# odd-shaped is the convention
padding_for_same(4)
Out[41]:
In [74]:
def convolution_strided_naive(image, kernel, stride):
output_x = int((image.shape[0] - kernel.shape[0]) / stride + 1)
output_y = int((image.shape[1] - kernel.shape[1]) / stride + 1)
output = np.zeros((output_x, output_y))
# walk over output rows
for i in range(output.shape[0]):
# walk over output columns
for j in range(output.shape[1]):
# walk over filter/kernel rows
for k in range(kernel.shape[0]):
# walk over filter/kernel columns
for l in range(kernel.shape[1]):
image_x = i * stride + k
image_y = j * stride + l
output[i, j] += image[image_x, image_y] * kernel[k, l]
return output
In [75]:
image = np.array([
[2, 3, 7, 4, 6, 2, 9],
[6, 6, 9, 8, 7, 4, 3],
[3, 4, 8, 3, 8, 9, 7],
[7, 8, 3, 6, 6, 3, 4],
[4, 2, 1, 8, 3, 4, 6],
[3, 2, 4, 1, 9, 8, 3],
[0, 1, 3, 9, 2, 1, 4]
])
In [76]:
kernel = np.array([[3, 4, 4], [1, 0, 2], [-1, 0, 3]])
In [77]:
convolution_strided_naive(image, kernel, stride=2)
Out[77]:
⚠ What we've done so far is technically called cross-correlation and not convolution. So what's really done in most of deep learning literature is actually cross-correlation and not convolution.
https://www.coursera.org/learn/convolutional-neural-networks/lecture/wfUhx/strided-convolutions
In [ ]:
def mirror(x):
return np.flipud(np.fliplr(kernel)).T
In [129]:
kernel = np.array([
[3, 4, 5],
[1, 0, 2],
[-1, 9, 7]
])
In [130]:
mirror(kernel)
Out[130]:
The reason this is done in signal processing and some branches of math is that we get associativity properties which is lost with our previous notation. However deep learners do not care about associativity (keeps code simple and achieves the same result).
In [138]:
def convolution_3d_naive(image, kernel):
assert image.ndim == 3, "we're 3d now bro"
assert kernel.ndim == 3, "we're 3d now bro"
output = np.zeros((image.shape[0] - kernel.shape[0] + 1,
image.shape[1] - kernel.shape[1] + 1))
# walk over output rows
for i in range(output.shape[0]):
# walk over output columns
for j in range(output.shape[1]):
# walk over filter/kernel rows
for k in range(kernel.shape[0]):
# walk over filter/kernel columns
for l in range(kernel.shape[1]):
# walk over filter channels/depth
for m in range(kernel.shape[2]):
image_x = i + k
image_y = j + l
value_from_image = image[image_x, image_y, m]
output[i, j] += value_from_image * kernel[k, l, m]
return output
In [139]:
image = np.random.randint(1, 10, (6, 6, 3))
kernel = np.random.randint(1, 10, (3, 3, 3))
In [141]:
convolution_3d_naive(image, kernel)
Out[141]: