Last active
March 13, 2022 23:30
-
-
Save ntjess/e77b962edfbc760f00c018916bb41f36 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import torch | |
from torch.nn.functional import conv2d | |
from skimage.util.shape import view_as_windows | |
# Try with values that don't have transpose symmetry | |
input_matrix_ = np.array( | |
[[3,9,0,1], | |
[2,8,1,1], | |
[3,4,8,1], | |
[2,2,2,2]], | |
dtype=np.float32 | |
) | |
kernel_ = np.array( | |
[[8, 9], | |
[4, 4]], | |
dtype=np.float32 | |
) | |
bias_ = np.array([0.06], dtype=np.float32) | |
# Convenience functions | |
to_t = lambda arr: torch.Tensor(arr) | |
to_np = lambda arr: np.array(arr) | |
def conv2d_naive(input_matrix, kernel, bias): | |
kernel_shape = np.array(kernel.shape[:2]) | |
out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1 | |
out = np.empty(out_shape, dtype=np.float32) | |
# Suggested change 1: iterate over correct bounds | |
# Python works with the code you wrote since range(input_shape-1) happened to | |
# match output_shape in the toy example | |
# "shape-1" is not needed, since python stops its iteration *before* the stop value | |
for row in range(out_shape[0]): | |
for col in range(out_shape[1]): | |
patch = input_matrix[row:row + kernel_shape[0], col:col + kernel_shape[1]] | |
# Minor suggested change: replace np.multiply with the clearer "*" operation | |
out[row, col] = np.sum(patch * kernel) | |
return out + bias | |
def conv2d_im2col(input_matrix, kernel, bias): | |
kernel_shape = np.array(kernel.shape[:2]) | |
out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1 | |
rows = [] | |
for row in range(out_shape[0]): | |
for col in range(out_shape[1]): | |
patch = input_matrix[row:row + kernel_shape[0], col:col + kernel_shape[1]] | |
rows.append(patch.flatten()) | |
# Suggested change 2: For broadcasting to work, you must not transpose these values | |
# It worked in the video due to the example matrix shapes, but does not work for | |
# arbitrary combinations | |
rows = np.array(rows) | |
# Minor suggested change: Use matrix multiply "@" rather than np.dot to convey clarity | |
# (but this is not necessary) | |
conv = (rows @ kernel.flatten()) + bias | |
# Minor change: Reshape output to be 2D (this wasn't done in the video) | |
return conv.reshape(out_shape) | |
def conv2d_strided(input_matrix, kernel, bias): | |
kernel_shape = np.array(kernel.shape[:2]) | |
out_shape = np.array(input_matrix.shape[:2]) - kernel_shape + 1 | |
kernel_shape_flat = np.prod(kernel_shape) | |
strides = view_as_windows(input_matrix, kernel_shape).reshape(-1, kernel_shape_flat) | |
conv = (strides @ kernel.flatten()) + bias | |
return conv.reshape(out_shape) | |
def conv2d_pytorch(input_matrix, kernel, bias): | |
return conv2d(to_t(input_matrix).unsqueeze(0).unsqueeze(0), to_t(kernel).unsqueeze(0).unsqueeze(0), to_t(bias)) | |
print(conv2d_im2col(input_matrix_, kernel_, bias_)) | |
print(conv2d_naive(input_matrix_, kernel_, bias_)) | |
print(conv2d_strided(input_matrix_, kernel_, bias_)) | |
# Undo the "unsqueeze" ops by selecting only the salient values | |
# Convert back to numpy for easy comparisons to previous output | |
print(to_np(conv2d_pytorch(input_matrix_, kernel_, bias_))[0,0,...].round(2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Video reference: https://www.youtube.com/watch?v=-Y4ST8eNySI