arthurmensch · May 19, 2018 10:25
diff --git a/double_backward.py b/double_backward.py
 """
 Custom twice differentiable functions in Pytorch.

 Author: Arthur Mensch
 """


 import torch

 import torch.nn as nn
 from torch.autograd.gradcheck import gradgradcheck, gradcheck


 class CubeFunction(torch.autograd.Function):
    """
    Dummy activation function x -> x ** 3
    """
    @staticmethod
    def forward(ctx, X):
        ctx.save_for_backward(X)
        return X ** 3

    @staticmethod
    def backward(ctx, M):
        X, = ctx.saved_tensors
        return CubeFunctionBackward.apply(X, M)


 class CubeFunctionBackward(torch.autograd.Function):

    @staticmethod
    def forward(ctx, X, M):
        ctx.save_for_backward(X, M)
        return M * 3 * X ** 2

    @staticmethod
    def backward(ctx, V):
        X, M = ctx.saved_tensors
        return V * 6 * X * M, V * 3 * X ** 2


 class Cube(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, X):
        return CubeFunction.apply(X)


 class TrueCube(nn.Module):
    """
    Pytorch x -> x ** 3 is twice differentiable already, we use it for reference.
    """
    def __init__(self):
        super().__init__()

    def forward(self, X):
        return X ** 3

 torch.manual_seed(0)

 X = torch.Tensor([3.])
 X.requires_grad_()
 print('x:', X)
 cube = Cube()

 Y = cube(X)
 print('f(x):', Y)
 S = torch.sum(Y)
 S.backward()
 print('<Grad (f)(x), 1>:', X.grad)

 X.grad.zero_()
 X.requires_grad_()
 Y = cube(X)
 S = torch.sum(Y)
 G, = torch.autograd.grad(S, (X, ), create_graph=True)
 S = G.sum()
 S.backward()
 print('Grad^2 (f) 1:', X.grad)

 X.grad.zero_()
 gradcheck(cube, (X, ), eps=1e-4, atol=1e-2)
 X.grad.zero_()
 gradgradcheck(cube, (X, ), eps=1e-4, atol=1e-2)
	"""
	Custom twice differentiable functions in Pytorch.

	Author: Arthur Mensch
	"""


	import torch

	import torch.nn as nn
	from torch.autograd.gradcheck import gradgradcheck, gradcheck


	class CubeFunction(torch.autograd.Function):
	"""
	Dummy activation function x -> x ** 3
	"""
	@staticmethod
	def forward(ctx, X):
	ctx.save_for_backward(X)
	return X ** 3

	@staticmethod
	def backward(ctx, M):
	X, = ctx.saved_tensors
	return CubeFunctionBackward.apply(X, M)


	class CubeFunctionBackward(torch.autograd.Function):

	@staticmethod
	def forward(ctx, X, M):
	ctx.save_for_backward(X, M)
	return M * 3 * X ** 2

	@staticmethod
	def backward(ctx, V):
	X, M = ctx.saved_tensors
	return V * 6 * X * M, V * 3 * X ** 2


	class Cube(nn.Module):
	def __init__(self):
	super().__init__()

	def forward(self, X):
	return CubeFunction.apply(X)


	class TrueCube(nn.Module):
	"""
	Pytorch x -> x ** 3 is twice differentiable already, we use it for reference.
	"""
	def __init__(self):
	super().__init__()

	def forward(self, X):
	return X ** 3

	torch.manual_seed(0)

	X = torch.Tensor([3.])
	X.requires_grad_()
	print('x:', X)
	cube = Cube()

	Y = cube(X)
	print('f(x):', Y)
	S = torch.sum(Y)
	S.backward()
	print('<Grad (f)(x), 1>:', X.grad)

	X.grad.zero_()
	X.requires_grad_()
	Y = cube(X)
	S = torch.sum(Y)
	G, = torch.autograd.grad(S, (X, ), create_graph=True)
	S = G.sum()
	S.backward()
	print('Grad^2 (f) 1:', X.grad)

	X.grad.zero_()
	gradcheck(cube, (X, ), eps=1e-4, atol=1e-2)
	X.grad.zero_()
	gradgradcheck(cube, (X, ), eps=1e-4, atol=1e-2)