Created
August 17, 2018 19:55
-
-
Save yrevar/140c2f6b79789197324be00bda0042ae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import torch\n", | |
"import numpy as np\n", | |
"from torch import FloatTensor\n", | |
"from torch.autograd import Variable, grad, gradcheck" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# PyTorch Automatic differentiation" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Scalar–Scalar Mapping ($f:\\mathbb R\\to\\mathbb R$)\n", | |
"### Derivatives\n", | |
"\n", | |
"**Scalar Sum ** \n", | |
"$$\n", | |
"y = x + 2\\\\\n", | |
"\\frac{dy}{dx} = 1\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"dy/dx: [1.]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n", | |
"y = x + 42\n", | |
"y.backward()\n", | |
"print(\"dy/dx:\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Scalar Multiplication** \n", | |
"$$\n", | |
"y = 42 * x\\\\\n", | |
"\\frac{dy}{dx} = 42\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"dy/dx: [42.]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n", | |
"y = x * 42\n", | |
"y.backward()\n", | |
"print(\"dy/dx:\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Scalar Exponentiation** \n", | |
"$$\n", | |
"y = x^{0.42}\\\\\n", | |
"\\frac{dy}{dx} = 0.42x^{-0.58}\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"dy/dx: [0.04805789]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n", | |
"y = x**0.42\n", | |
"y.backward()\n", | |
"print(\"dy/dx:\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Vector-Scalar Mapping ($f:\\mathbb R^d\\to\\mathbb R$)\n", | |
"### Gradients\n", | |
"**Sum of vector elements**\n", | |
"$$y = \\sum_{x_i} (x_i + 42)\\\\\n", | |
"\\frac{\\partial y}{\\partial x_i} = 1\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"x: tensor([[0., 0.],\n", | |
" [0., 0.]], requires_grad=True) \n", | |
"y: tensor([[42., 42.],\n", | |
" [42., 42.]], grad_fn=<AddBackward0>) \n", | |
"(partial y)/(partial x):\n", | |
" [[1. 1.]\n", | |
" [1. 1.]]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.zeros(2, 2, requires_grad=True)\n", | |
"y = x + 42\n", | |
"z = y.sum()\n", | |
"z.backward()\n", | |
"print(\"x:\", x, \"\\ny:\", y, \"\\n(partial y)/(partial x):\\n\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"$$y = \\sum_{x_i} (x_i * 42) = 42 * \\sum_{x_i}x_i\\\\\n", | |
"\\frac{\\partial y}{\\partial x_i} = 42\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(partial y)/(partial x):\n", | |
" [[42. 42.]\n", | |
" [42. 42.]]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.zeros(2, 2, requires_grad=True)\n", | |
"y = x * 42\n", | |
"z = y.sum()\n", | |
"z.backward()\n", | |
"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Mean of vector elements**\n", | |
"$$y = \\frac{1}{|\\textbf{x}|}\\sum_{x_i} (x_i + 42)\\\\\n", | |
"\\frac{\\partial y}{\\partial x_i} = \\frac{1}{|\\textbf{x}|}\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(partial y)/(partial x):\n", | |
" [[0.25 0.25]\n", | |
" [0.25 0.25]]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.zeros(2, 2, requires_grad=True)\n", | |
"y = x + 42\n", | |
"z = y.mean()\n", | |
"z.backward()\n", | |
"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Product of vector elements**\n", | |
"$$y = \\sum_{x_i} (x_i * 42) = 42 * \\prod_{x_i}x_i\\\\\n", | |
"\\frac{\\partial y}{\\partial x_i} = 0\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(partial y)/(partial x):\n", | |
" [[0. 0.]\n", | |
" [0. 0.]]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.zeros(2, 2, requires_grad=True)\n", | |
"y = x * 42\n", | |
"z = y.prod()\n", | |
"z.backward()\n", | |
"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Dot Product of two vectors**\n", | |
"$$z = \\textbf{x} \\cdot \\textbf{y}\\\\\n", | |
"\\frac{\\partial z}{\\partial x_i} = y_i\n", | |
"$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(partial y)/(partial x):\n", | |
" [4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2.]\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.arange(0, 20, 1, dtype=torch.float, requires_grad=True)\n", | |
"y = torch.tensor([4,2], dtype=torch.float).repeat(10)\n", | |
"z = x.dot(y)\n", | |
"z.backward()\n", | |
"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Vector-Vector Mapping ($f:\\mathbb R^d\\to\\mathbb R^k$)\n", | |
"### Jacobian\n", | |
"\n", | |
"**Vector Sum**\n", | |
"$$\\textbf{y} = \\textbf{x} + 42$$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Jacobian J(X,Y):\n", | |
" [[[1. 0. 0. 0.]\n", | |
" [0. 1. 0. 0.]\n", | |
" [0. 0. 1. 0.]\n", | |
" [0. 0. 0. 1.]]\n", | |
"\n", | |
" [[1. 0. 0. 0.]\n", | |
" [0. 1. 0. 0.]\n", | |
" [0. 0. 1. 0.]\n", | |
" [0. 0. 0. 1.]]\n", | |
"\n", | |
" [[1. 0. 0. 0.]\n", | |
" [0. 1. 0. 0.]\n", | |
" [0. 0. 1. 0.]\n", | |
" [0. 0. 0. 1.]]\n", | |
"\n", | |
" [[1. 0. 0. 0.]\n", | |
" [0. 1. 0. 0.]\n", | |
" [0. 0. 1. 0.]\n", | |
" [0. 0. 0. 1.]]]\n" | |
] | |
} | |
], | |
"source": [ | |
"def jacobian(inputs, outputs):\n", | |
" \"\"\"\n", | |
" Ref: https://discuss.pytorch.org/t/calculating-jacobian-in-a-differentiable-way/13275/2\n", | |
" \"\"\"\n", | |
" return torch.stack([grad([outputs[:, i].sum()], [inputs], retain_graph=True, create_graph=True)[0]\n", | |
" for i in range(outputs.size(1))], dim=-1)\n", | |
"\n", | |
"x = torch.ones(4, 4, requires_grad=True)\n", | |
"y = x + 42\n", | |
"print(\"Jacobian J(X,Y):\\n\", jacobian(x,y).data.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"**Accessing specific gradient**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"tensor([[1., 0., 0., 0.],\n", | |
" [0., 0., 0., 0.],\n", | |
" [0., 0., 0., 0.],\n", | |
" [0., 0., 0., 0.]])\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.ones(4, 4, requires_grad=True)\n", | |
"y = x + 42\n", | |
"y[0,0].backward()\n", | |
"print(x.grad)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Misc example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"x: tensor([[1., 1.],\n", | |
" [1., 1.]], requires_grad=True) \n", | |
"y: tensor([[3., 3.],\n", | |
" [3., 3.]], grad_fn=<AddBackward0>) \n", | |
"z: tensor([[9., 9.],\n", | |
" [9., 9.]], grad_fn=<MulBackward0>) \n", | |
"out: tensor(9., grad_fn=<MeanBackward1>)\n", | |
"dout/dx:\n", | |
" tensor([[1.5000, 1.5000],\n", | |
" [1.5000, 1.5000]])\n" | |
] | |
} | |
], | |
"source": [ | |
"x = torch.ones(2, 2, requires_grad=True)\n", | |
"y = x + 2\n", | |
"z = y * y\n", | |
"out = z.mean() # out = (z1 + z2 + z3 + z4)/4\n", | |
"print(\"x:\",x,\"\\ny:\",y, \"\\nz:\", z, \"\\nout:\", out)\n", | |
"out.backward()\n", | |
"print(\"dout/dx:\\n\", x.grad) # dout/dx1 = dout/dz1 * dz1/dy1 * dy1/dx1 = 1/4 * 2(y) * 1; y = 3 = 1.5" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Recursive function ($f:\\mathbb R\\to\\mathbb R; f(x_{n+1})=f(x_n)$)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def recursive_squaring(x, times=1):\n", | |
" if times == 0:\n", | |
" return x\n", | |
" return recursive_squaring(x*x, times-1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"x: tensor([2.], requires_grad=True) \n", | |
"out1 (x^2): tensor([4.], grad_fn=<PowBackward0>) \n", | |
"out2 (recursive x^2): tensor([4.], grad_fn=<MulBackward0>)\n", | |
"dout1/dx: tensor([4.])\n", | |
"dout2/dx: tensor([4.])\n" | |
] | |
} | |
], | |
"source": [ | |
"x = Variable(torch.Tensor([2]), requires_grad=True)\n", | |
"out1 = x**2\n", | |
"out2 = recursive_squaring(x,1)\n", | |
"print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n", | |
"out1.backward(retain_graph=True)\n", | |
"print(\"dout1/dx:\", x.grad)\n", | |
"x.grad.zero_()\n", | |
"out2.backward(retain_graph=True)\n", | |
"print(\"dout2/dx:\", x.grad)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"x: tensor([2.], requires_grad=True) \n", | |
"out1 (x^2): tensor([16.], grad_fn=<PowBackward0>) \n", | |
"out2 (recursive x^2): tensor([16.], grad_fn=<MulBackward0>)\n", | |
"dout1/dx: tensor([32.])\n", | |
"dout2/dx: tensor([32.])\n" | |
] | |
} | |
], | |
"source": [ | |
"x = Variable(torch.Tensor([2]), requires_grad=True)\n", | |
"out1 = x**4\n", | |
"out2 = recursive_squaring(x,2)\n", | |
"print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n", | |
"out1.backward(retain_graph=True)\n", | |
"print(\"dout1/dx:\", x.grad)\n", | |
"x.grad.zero_()\n", | |
"out2.backward(retain_graph=True)\n", | |
"print(\"dout2/dx:\", x.grad)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Recursion function returning multiple/tuple results ($f:\\mathbb R\\to\\mathbb R^k; f(x_{n+1})=f(x_n)$)\n", | |
"(I had noticed earlier that Tensorflow eager execution didn't support this very well.)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def recursive_squaring2(x, times=1):\n", | |
" res = x\n", | |
" for _ in range(times):\n", | |
" res = res * res\n", | |
" return res, 42*res" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"x: tensor([2.], requires_grad=True) \n", | |
"out1 (x^2): tensor([16.], grad_fn=<PowBackward0>) \n", | |
"out2 (recursive x^2): tensor([16.], grad_fn=<MulBackward0>)\n", | |
"dout1/dx: tensor([32.])\n", | |
"dout2_1/dx: tensor([32.])\n", | |
"dout2_2/dx: tensor([1344.])\n" | |
] | |
} | |
], | |
"source": [ | |
"x = Variable(torch.Tensor([2]), requires_grad=True)\n", | |
"out1 = x**4\n", | |
"out2_1, out2_2 = recursive_squaring2(x,2)\n", | |
"print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n", | |
"out1.backward(retain_graph=True)\n", | |
"print(\"dout1/dx:\", x.grad)\n", | |
"x.grad.zero_()\n", | |
"out2_1.backward(retain_graph=True)\n", | |
"print(\"dout2_1/dx:\", x.grad)\n", | |
"x.grad.zero_()\n", | |
"out2_2.backward(retain_graph=True)\n", | |
"print(\"dout2_2/dx:\", x.grad) #32*42 = 1344" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "irl_python3", | |
"language": "python", | |
"name": "irl_python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment