bwasti · October 2, 2022 21:25
diff --git a/dconv.py b/dconv.py
 # example of backward pass implemented with only forward functions

 import torch
 import torch.nn.functional as F


 def dconv(grad, x, w, stride, padding, groups):
    batch = grad.shape[0]
    channel_out = grad.shape[1]
    channel_in = x.shape[1]
    k = w.shape[-1]

    # differentiating w.r.t x
    gpad = (k - 1) - (stride - 1) - padding
    dxgrad = grad
    if stride > 1:  # manually dilate the incoming gradient :)
        dxgrad = dxgrad.reshape(*dxgrad.shape, 1, 1)
        dxgrad = F.pad(dxgrad, (stride - 1, 0, stride - 1, 0)).transpose(3, 4)
        spatial = grad.shape[-2:]
        dxgrad = dxgrad.reshape(*grad.shape[:-2], *[2 * d for d in spatial])
        dxgrad = F.pad(dxgrad, (0, stride - 1, 0, stride - 1))

    dxw = w.flip([2, 3])
    if groups > 1:  # transpose within the groups
        dxw = dxw.reshape(groups, dxw.shape[0] // groups, *dxw.shape[1:])
        dxw = dxw.transpose(1, 2)
        dxw = dxw.reshape(-1, *dxw.shape[2:])
    else:
        dxw = dxw.transpose(0, 1)

    dx = torch.conv2d(dxgrad, dxw, padding=gpad, groups=groups)

    # differentiating w.r.t w
    dwgrad = grad.transpose(0, 1)
    if groups > 1:
        dwx = x.reshape(x.shape[0], groups, x.shape[1] // groups, *x.shape[2:])
        dwx = dwx.transpose(0, 2)
        dwx = dwx.reshape(dwx.shape[0], -1, *dwx.shape[3:])
    else:
        dwx = x.transpose(0, 1)
    dw = torch.conv2d(dwx, dwgrad, padding=padding, dilation=stride, groups=groups)
    dw = dw.transpose(0, 1)
    return dx, dw

 # check it works
 x = torch.randn(7, 4, 5, 5)
 x.requires_grad = True
 w = torch.randn(6, 2, 3, 3)
 w.requires_grad = True

 y = torch.conv2d(x, w, stride=2, padding=1, groups=2)
 grad = torch.randn(7, 6, 3, 3)
 y.backward(grad)

 dx, dw = dconv(grad, x, w, 2, 1, 2)
 torch.testing.assert_close(x.grad, dx)
 torch.testing.assert_close(w.grad, dw)
 print("pass")
	# example of backward pass implemented with only forward functions

	import torch
	import torch.nn.functional as F


	def dconv(grad, x, w, stride, padding, groups):
	batch = grad.shape[0]
	channel_out = grad.shape[1]
	channel_in = x.shape[1]
	k = w.shape[-1]

	# differentiating w.r.t x
	gpad = (k - 1) - (stride - 1) - padding
	dxgrad = grad
	if stride > 1: # manually dilate the incoming gradient :)
	dxgrad = dxgrad.reshape(*dxgrad.shape, 1, 1)
	dxgrad = F.pad(dxgrad, (stride - 1, 0, stride - 1, 0)).transpose(3, 4)
	spatial = grad.shape[-2:]
	dxgrad = dxgrad.reshape(grad.shape[:-2], [2 * d for d in spatial])
	dxgrad = F.pad(dxgrad, (0, stride - 1, 0, stride - 1))

	dxw = w.flip([2, 3])
	if groups > 1: # transpose within the groups
	dxw = dxw.reshape(groups, dxw.shape[0] // groups, *dxw.shape[1:])
	dxw = dxw.transpose(1, 2)
	dxw = dxw.reshape(-1, *dxw.shape[2:])
	else:
	dxw = dxw.transpose(0, 1)

	dx = torch.conv2d(dxgrad, dxw, padding=gpad, groups=groups)

	# differentiating w.r.t w
	dwgrad = grad.transpose(0, 1)
	if groups > 1:
	dwx = x.reshape(x.shape[0], groups, x.shape[1] // groups, *x.shape[2:])
	dwx = dwx.transpose(0, 2)
	dwx = dwx.reshape(dwx.shape[0], -1, *dwx.shape[3:])
	else:
	dwx = x.transpose(0, 1)
	dw = torch.conv2d(dwx, dwgrad, padding=padding, dilation=stride, groups=groups)
	dw = dw.transpose(0, 1)
	return dx, dw

	# check it works
	x = torch.randn(7, 4, 5, 5)
	x.requires_grad = True
	w = torch.randn(6, 2, 3, 3)
	w.requires_grad = True

	y = torch.conv2d(x, w, stride=2, padding=1, groups=2)
	grad = torch.randn(7, 6, 3, 3)
	y.backward(grad)

	dx, dw = dconv(grad, x, w, 2, 1, 2)
	torch.testing.assert_close(x.grad, dx)
	torch.testing.assert_close(w.grad, dw)
	print("pass")