yrevar · August 17, 2018 19:55
diff --git a/PyTorchAutoDiff.ipynb b/PyTorchAutoDiff.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "from torch import FloatTensor\n",
    "from torch.autograd import Variable, grad, gradcheck"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PyTorch Automatic differentiation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Scalar–Scalar Mapping ($f:\\mathbb R\\to\\mathbb R$)\n",
    "### Derivatives\n",
    "\n",
    "**Scalar Sum **  \n",
    "$$\n",
    "y = x + 2\\\\\n",
    "\\frac{dy}{dx} = 1\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dy/dx: [1.]\n"
     ]
    }
   ],
   "source": [
    "x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n",
    "y = x + 42\n",
    "y.backward()\n",
    "print(\"dy/dx:\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Scalar Multiplication**  \n",
    "$$\n",
    "y = 42 * x\\\\\n",
    "\\frac{dy}{dx} = 42\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dy/dx: [42.]\n"
     ]
    }
   ],
   "source": [
    "x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n",
    "y = x * 42\n",
    "y.backward()\n",
    "print(\"dy/dx:\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Scalar Exponentiation**  \n",
    "$$\n",
    "y = x^{0.42}\\\\\n",
    "\\frac{dy}{dx} = 0.42x^{-0.58}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dy/dx: [0.04805789]\n"
     ]
    }
   ],
   "source": [
    "x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n",
    "y = x**0.42\n",
    "y.backward()\n",
    "print(\"dy/dx:\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vector-Scalar Mapping ($f:\\mathbb R^d\\to\\mathbb R$)\n",
    "### Gradients\n",
    "**Sum of vector elements**\n",
    "$$y = \\sum_{x_i} (x_i + 42)\\\\\n",
    "\\frac{\\partial y}{\\partial x_i} = 1\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: tensor([[0., 0.],\n",
      "        [0., 0.]], requires_grad=True) \n",
      "y: tensor([[42., 42.],\n",
      "        [42., 42.]], grad_fn=<AddBackward0>) \n",
      "(partial y)/(partial x):\n",
      " [[1. 1.]\n",
      " [1. 1.]]\n"
     ]
    }
   ],
   "source": [
    "x = torch.zeros(2, 2, requires_grad=True)\n",
    "y = x + 42\n",
    "z = y.sum()\n",
    "z.backward()\n",
    "print(\"x:\", x, \"\\ny:\", y, \"\\n(partial y)/(partial x):\\n\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$$y = \\sum_{x_i} (x_i * 42) = 42 * \\sum_{x_i}x_i\\\\\n",
    "\\frac{\\partial y}{\\partial x_i} = 42\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(partial y)/(partial x):\n",
      " [[42. 42.]\n",
      " [42. 42.]]\n"
     ]
    }
   ],
   "source": [
    "x = torch.zeros(2, 2, requires_grad=True)\n",
    "y = x * 42\n",
    "z = y.sum()\n",
    "z.backward()\n",
    "print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Mean of vector elements**\n",
    "$$y = \\frac{1}{|\\textbf{x}|}\\sum_{x_i} (x_i + 42)\\\\\n",
    "\\frac{\\partial y}{\\partial x_i} = \\frac{1}{|\\textbf{x}|}\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(partial y)/(partial x):\n",
      " [[0.25 0.25]\n",
      " [0.25 0.25]]\n"
     ]
    }
   ],
   "source": [
    "x = torch.zeros(2, 2, requires_grad=True)\n",
    "y = x + 42\n",
    "z = y.mean()\n",
    "z.backward()\n",
    "print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Product of vector elements**\n",
    "$$y = \\sum_{x_i} (x_i * 42) = 42 * \\prod_{x_i}x_i\\\\\n",
    "\\frac{\\partial y}{\\partial x_i} = 0\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(partial y)/(partial x):\n",
      " [[0. 0.]\n",
      " [0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "x = torch.zeros(2, 2, requires_grad=True)\n",
    "y = x * 42\n",
    "z = y.prod()\n",
    "z.backward()\n",
    "print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Dot Product of two vectors**\n",
    "$$z = \\textbf{x} \\cdot \\textbf{y}\\\\\n",
    "\\frac{\\partial z}{\\partial x_i} = y_i\n",
    "$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(partial y)/(partial x):\n",
      " [4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2.]\n"
     ]
    }
   ],
   "source": [
    "x = torch.arange(0, 20, 1, dtype=torch.float, requires_grad=True)\n",
    "y = torch.tensor([4,2], dtype=torch.float).repeat(10)\n",
    "z = x.dot(y)\n",
    "z.backward()\n",
    "print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vector-Vector Mapping ($f:\\mathbb R^d\\to\\mathbb R^k$)\n",
    "### Jacobian\n",
    "\n",
    "**Vector Sum**\n",
    "$$\\textbf{y} = \\textbf{x} + 42$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Jacobian J(X,Y):\n",
      " [[[1. 0. 0. 0.]\n",
      "  [0. 1. 0. 0.]\n",
      "  [0. 0. 1. 0.]\n",
      "  [0. 0. 0. 1.]]\n",
      "\n",
      " [[1. 0. 0. 0.]\n",
      "  [0. 1. 0. 0.]\n",
      "  [0. 0. 1. 0.]\n",
      "  [0. 0. 0. 1.]]\n",
      "\n",
      " [[1. 0. 0. 0.]\n",
      "  [0. 1. 0. 0.]\n",
      "  [0. 0. 1. 0.]\n",
      "  [0. 0. 0. 1.]]\n",
      "\n",
      " [[1. 0. 0. 0.]\n",
      "  [0. 1. 0. 0.]\n",
      "  [0. 0. 1. 0.]\n",
      "  [0. 0. 0. 1.]]]\n"
     ]
    }
   ],
   "source": [
    "def jacobian(inputs, outputs):\n",
    "    \"\"\"\n",
    "    Ref: https://discuss.pytorch.org/t/calculating-jacobian-in-a-differentiable-way/13275/2\n",
    "    \"\"\"\n",
    "    return torch.stack([grad([outputs[:, i].sum()], [inputs], retain_graph=True, create_graph=True)[0]\n",
    "                        for i in range(outputs.size(1))], dim=-1)\n",
    "\n",
    "x = torch.ones(4, 4, requires_grad=True)\n",
    "y = x + 42\n",
    "print(\"Jacobian J(X,Y):\\n\", jacobian(x,y).data.numpy())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Accessing specific gradient**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[1., 0., 0., 0.],\n",
      "        [0., 0., 0., 0.],\n",
      "        [0., 0., 0., 0.],\n",
      "        [0., 0., 0., 0.]])\n"
     ]
    }
   ],
   "source": [
    "x = torch.ones(4, 4, requires_grad=True)\n",
    "y = x + 42\n",
    "y[0,0].backward()\n",
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Misc example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: tensor([[1., 1.],\n",
      "        [1., 1.]], requires_grad=True) \n",
      "y: tensor([[3., 3.],\n",
      "        [3., 3.]], grad_fn=<AddBackward0>) \n",
      "z: tensor([[9., 9.],\n",
      "        [9., 9.]], grad_fn=<MulBackward0>) \n",
      "out: tensor(9., grad_fn=<MeanBackward1>)\n",
      "dout/dx:\n",
      " tensor([[1.5000, 1.5000],\n",
      "        [1.5000, 1.5000]])\n"
     ]
    }
   ],
   "source": [
    "x = torch.ones(2, 2, requires_grad=True)\n",
    "y = x + 2\n",
    "z = y * y\n",
    "out = z.mean() # out = (z1 + z2 + z3 + z4)/4\n",
    "print(\"x:\",x,\"\\ny:\",y, \"\\nz:\", z, \"\\nout:\", out)\n",
    "out.backward()\n",
    "print(\"dout/dx:\\n\", x.grad) # dout/dx1 = dout/dz1 * dz1/dy1 * dy1/dx1 = 1/4 * 2(y) * 1; y = 3 = 1.5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Recursive function ($f:\\mathbb R\\to\\mathbb R; f(x_{n+1})=f(x_n)$)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recursive_squaring(x, times=1):\n",
    "    if times == 0:\n",
    "        return x\n",
    "    return recursive_squaring(x*x, times-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: tensor([2.], requires_grad=True) \n",
      "out1 (x^2): tensor([4.], grad_fn=<PowBackward0>) \n",
      "out2 (recursive x^2): tensor([4.], grad_fn=<MulBackward0>)\n",
      "dout1/dx: tensor([4.])\n",
      "dout2/dx: tensor([4.])\n"
     ]
    }
   ],
   "source": [
    "x = Variable(torch.Tensor([2]), requires_grad=True)\n",
    "out1 = x**2\n",
    "out2 = recursive_squaring(x,1)\n",
    "print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n",
    "out1.backward(retain_graph=True)\n",
    "print(\"dout1/dx:\", x.grad)\n",
    "x.grad.zero_()\n",
    "out2.backward(retain_graph=True)\n",
    "print(\"dout2/dx:\", x.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: tensor([2.], requires_grad=True) \n",
      "out1 (x^2): tensor([16.], grad_fn=<PowBackward0>) \n",
      "out2 (recursive x^2): tensor([16.], grad_fn=<MulBackward0>)\n",
      "dout1/dx: tensor([32.])\n",
      "dout2/dx: tensor([32.])\n"
     ]
    }
   ],
   "source": [
    "x = Variable(torch.Tensor([2]), requires_grad=True)\n",
    "out1 = x**4\n",
    "out2 = recursive_squaring(x,2)\n",
    "print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n",
    "out1.backward(retain_graph=True)\n",
    "print(\"dout1/dx:\", x.grad)\n",
    "x.grad.zero_()\n",
    "out2.backward(retain_graph=True)\n",
    "print(\"dout2/dx:\", x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Recursion function returning multiple/tuple results ($f:\\mathbb R\\to\\mathbb R^k; f(x_{n+1})=f(x_n)$)\n",
    "(I had noticed earlier that Tensorflow eager execution didn't support this very well.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def recursive_squaring2(x, times=1):\n",
    "    res = x\n",
    "    for _ in range(times):\n",
    "        res = res * res\n",
    "    return res, 42*res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: tensor([2.], requires_grad=True) \n",
      "out1 (x^2): tensor([16.], grad_fn=<PowBackward0>) \n",
      "out2 (recursive x^2): tensor([16.], grad_fn=<MulBackward0>)\n",
      "dout1/dx: tensor([32.])\n",
      "dout2_1/dx: tensor([32.])\n",
      "dout2_2/dx: tensor([1344.])\n"
     ]
    }
   ],
   "source": [
    "x = Variable(torch.Tensor([2]), requires_grad=True)\n",
    "out1 = x**4\n",
    "out2_1, out2_2 = recursive_squaring2(x,2)\n",
    "print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n",
    "out1.backward(retain_graph=True)\n",
    "print(\"dout1/dx:\", x.grad)\n",
    "x.grad.zero_()\n",
    "out2_1.backward(retain_graph=True)\n",
    "print(\"dout2_1/dx:\", x.grad)\n",
    "x.grad.zero_()\n",
    "out2_2.backward(retain_graph=True)\n",
    "print(\"dout2_2/dx:\", x.grad) #32*42 = 1344"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "irl_python3",
   "language": "python",
   "name": "irl_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import torch\n",
	"import numpy as np\n",
	"from torch import FloatTensor\n",
	"from torch.autograd import Variable, grad, gradcheck"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# PyTorch Automatic differentiation"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Scalar–Scalar Mapping ($f:\\mathbb R\\to\\mathbb R$)\n",
	"### Derivatives\n",
	"\n",
	"Scalar Sum \n",
	"$$\n",
	"y = x + 2\\\\\n",
	"\\frac{dy}{dx} = 1\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"dy/dx: [1.]\n"
	]
	}
	],
	"source": [
	"x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n",
	"y = x + 42\n",
	"y.backward()\n",
	"print(\"dy/dx:\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Scalar Multiplication \n",
	"$$\n",
	"y = 42 * x\\\\\n",
	"\\frac{dy}{dx} = 42\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"dy/dx: [42.]\n"
	]
	}
	],
	"source": [
	"x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n",
	"y = x * 42\n",
	"y.backward()\n",
	"print(\"dy/dx:\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Scalar Exponentiation \n",
	"$$\n",
	"y = x^{0.42}\\\\\n",
	"\\frac{dy}{dx} = 0.42x^{-0.58}\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"dy/dx: [0.04805789]\n"
	]
	}
	],
	"source": [
	"x = Variable(torch.tensor([42.]), requires_grad=True) # Only Tensors of floating point dtype can require gradients\n",
	"y = x**0.42\n",
	"y.backward()\n",
	"print(\"dy/dx:\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Vector-Scalar Mapping ($f:\\mathbb R^d\\to\\mathbb R$)\n",
	"### Gradients\n",
	"Sum of vector elements\n",
	"$$y = \\sum_{x_i} (x_i + 42)\\\\\n",
	"\\frac{\\partial y}{\\partial x_i} = 1\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"x: tensor([[0., 0.],\n",
	" [0., 0.]], requires_grad=True) \n",
	"y: tensor([[42., 42.],\n",
	" [42., 42.]], grad_fn=<AddBackward0>) \n",
	"(partial y)/(partial x):\n",
	" [[1. 1.]\n",
	" [1. 1.]]\n"
	]
	}
	],
	"source": [
	"x = torch.zeros(2, 2, requires_grad=True)\n",
	"y = x + 42\n",
	"z = y.sum()\n",
	"z.backward()\n",
	"print(\"x:\", x, \"\\ny:\", y, \"\\n(partial y)/(partial x):\\n\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"$$y = \\sum_{x_i} (x_i * 42) = 42 * \\sum_{x_i}x_i\\\\\n",
	"\\frac{\\partial y}{\\partial x_i} = 42\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(partial y)/(partial x):\n",
	" [[42. 42.]\n",
	" [42. 42.]]\n"
	]
	}
	],
	"source": [
	"x = torch.zeros(2, 2, requires_grad=True)\n",
	"y = x * 42\n",
	"z = y.sum()\n",
	"z.backward()\n",
	"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Mean of vector elements\n",
	"$$y = \\frac{1}{\|\\textbf{x}\|}\\sum_{x_i} (x_i + 42)\\\\\n",
	"\\frac{\\partial y}{\\partial x_i} = \\frac{1}{\|\\textbf{x}\|}\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(partial y)/(partial x):\n",
	" [[0.25 0.25]\n",
	" [0.25 0.25]]\n"
	]
	}
	],
	"source": [
	"x = torch.zeros(2, 2, requires_grad=True)\n",
	"y = x + 42\n",
	"z = y.mean()\n",
	"z.backward()\n",
	"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Product of vector elements\n",
	"$$y = \\sum_{x_i} (x_i * 42) = 42 * \\prod_{x_i}x_i\\\\\n",
	"\\frac{\\partial y}{\\partial x_i} = 0\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(partial y)/(partial x):\n",
	" [[0. 0.]\n",
	" [0. 0.]]\n"
	]
	}
	],
	"source": [
	"x = torch.zeros(2, 2, requires_grad=True)\n",
	"y = x * 42\n",
	"z = y.prod()\n",
	"z.backward()\n",
	"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Dot Product of two vectors\n",
	"$$z = \\textbf{x} \\cdot \\textbf{y}\\\\\n",
	"\\frac{\\partial z}{\\partial x_i} = y_i\n",
	"$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(partial y)/(partial x):\n",
	" [4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2. 4. 2.]\n"
	]
	}
	],
	"source": [
	"x = torch.arange(0, 20, 1, dtype=torch.float, requires_grad=True)\n",
	"y = torch.tensor([4,2], dtype=torch.float).repeat(10)\n",
	"z = x.dot(y)\n",
	"z.backward()\n",
	"print(\"(partial y)/(partial x):\\n\", x.grad.data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Vector-Vector Mapping ($f:\\mathbb R^d\\to\\mathbb R^k$)\n",
	"### Jacobian\n",
	"\n",
	"Vector Sum\n",
	"$$\\textbf{y} = \\textbf{x} + 42$$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Jacobian J(X,Y):\n",
	" [[[1. 0. 0. 0.]\n",
	" [0. 1. 0. 0.]\n",
	" [0. 0. 1. 0.]\n",
	" [0. 0. 0. 1.]]\n",
	"\n",
	" [[1. 0. 0. 0.]\n",
	" [0. 1. 0. 0.]\n",
	" [0. 0. 1. 0.]\n",
	" [0. 0. 0. 1.]]\n",
	"\n",
	" [[1. 0. 0. 0.]\n",
	" [0. 1. 0. 0.]\n",
	" [0. 0. 1. 0.]\n",
	" [0. 0. 0. 1.]]\n",
	"\n",
	" [[1. 0. 0. 0.]\n",
	" [0. 1. 0. 0.]\n",
	" [0. 0. 1. 0.]\n",
	" [0. 0. 0. 1.]]]\n"
	]
	}
	],
	"source": [
	"def jacobian(inputs, outputs):\n",
	" \"\"\"\n",
	" Ref: https://discuss.pytorch.org/t/calculating-jacobian-in-a-differentiable-way/13275/2\n",
	" \"\"\"\n",
	" return torch.stack([grad([outputs[:, i].sum()], [inputs], retain_graph=True, create_graph=True)[0]\n",
	" for i in range(outputs.size(1))], dim=-1)\n",
	"\n",
	"x = torch.ones(4, 4, requires_grad=True)\n",
	"y = x + 42\n",
	"print(\"Jacobian J(X,Y):\\n\", jacobian(x,y).data.numpy())"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Accessing specific gradient"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"tensor([[1., 0., 0., 0.],\n",
	" [0., 0., 0., 0.],\n",
	" [0., 0., 0., 0.],\n",
	" [0., 0., 0., 0.]])\n"
	]
	}
	],
	"source": [
	"x = torch.ones(4, 4, requires_grad=True)\n",
	"y = x + 42\n",
	"y[0,0].backward()\n",
	"print(x.grad)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Misc example"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"x: tensor([[1., 1.],\n",
	" [1., 1.]], requires_grad=True) \n",
	"y: tensor([[3., 3.],\n",
	" [3., 3.]], grad_fn=<AddBackward0>) \n",
	"z: tensor([[9., 9.],\n",
	" [9., 9.]], grad_fn=<MulBackward0>) \n",
	"out: tensor(9., grad_fn=<MeanBackward1>)\n",
	"dout/dx:\n",
	" tensor([[1.5000, 1.5000],\n",
	" [1.5000, 1.5000]])\n"
	]
	}
	],
	"source": [
	"x = torch.ones(2, 2, requires_grad=True)\n",
	"y = x + 2\n",
	"z = y * y\n",
	"out = z.mean() # out = (z1 + z2 + z3 + z4)/4\n",
	"print(\"x:\",x,\"\\ny:\",y, \"\\nz:\", z, \"\\nout:\", out)\n",
	"out.backward()\n",
	"print(\"dout/dx:\\n\", x.grad) # dout/dx1 = dout/dz1 * dz1/dy1 * dy1/dx1 = 1/4 * 2(y) * 1; y = 3 = 1.5"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Recursive function ($f:\\mathbb R\\to\\mathbb R; f(x_{n+1})=f(x_n)$)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"def recursive_squaring(x, times=1):\n",
	" if times == 0:\n",
	" return x\n",
	" return recursive_squaring(x*x, times-1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"x: tensor([2.], requires_grad=True) \n",
	"out1 (x^2): tensor([4.], grad_fn=<PowBackward0>) \n",
	"out2 (recursive x^2): tensor([4.], grad_fn=<MulBackward0>)\n",
	"dout1/dx: tensor([4.])\n",
	"dout2/dx: tensor([4.])\n"
	]
	}
	],
	"source": [
	"x = Variable(torch.Tensor([2]), requires_grad=True)\n",
	"out1 = x**2\n",
	"out2 = recursive_squaring(x,1)\n",
	"print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n",
	"out1.backward(retain_graph=True)\n",
	"print(\"dout1/dx:\", x.grad)\n",
	"x.grad.zero_()\n",
	"out2.backward(retain_graph=True)\n",
	"print(\"dout2/dx:\", x.grad)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"x: tensor([2.], requires_grad=True) \n",
	"out1 (x^2): tensor([16.], grad_fn=<PowBackward0>) \n",
	"out2 (recursive x^2): tensor([16.], grad_fn=<MulBackward0>)\n",
	"dout1/dx: tensor([32.])\n",
	"dout2/dx: tensor([32.])\n"
	]
	}
	],
	"source": [
	"x = Variable(torch.Tensor([2]), requires_grad=True)\n",
	"out1 = x**4\n",
	"out2 = recursive_squaring(x,2)\n",
	"print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n",
	"out1.backward(retain_graph=True)\n",
	"print(\"dout1/dx:\", x.grad)\n",
	"x.grad.zero_()\n",
	"out2.backward(retain_graph=True)\n",
	"print(\"dout2/dx:\", x.grad)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Recursion function returning multiple/tuple results ($f:\\mathbb R\\to\\mathbb R^k; f(x_{n+1})=f(x_n)$)\n",
	"(I had noticed earlier that Tensorflow eager execution didn't support this very well.)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [],
	"source": [
	"def recursive_squaring2(x, times=1):\n",
	" res = x\n",
	" for _ in range(times):\n",
	" res = res * res\n",
	" return res, 42*res"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"x: tensor([2.], requires_grad=True) \n",
	"out1 (x^2): tensor([16.], grad_fn=<PowBackward0>) \n",
	"out2 (recursive x^2): tensor([16.], grad_fn=<MulBackward0>)\n",
	"dout1/dx: tensor([32.])\n",
	"dout2_1/dx: tensor([32.])\n",
	"dout2_2/dx: tensor([1344.])\n"
	]
	}
	],
	"source": [
	"x = Variable(torch.Tensor([2]), requires_grad=True)\n",
	"out1 = x**4\n",
	"out2_1, out2_2 = recursive_squaring2(x,2)\n",
	"print(\"x:\", x, \"\\nout1 (x^2):\", out1, \"\\nout2 (recursive x^2):\", out2)\n",
	"out1.backward(retain_graph=True)\n",
	"print(\"dout1/dx:\", x.grad)\n",
	"x.grad.zero_()\n",
	"out2_1.backward(retain_graph=True)\n",
	"print(\"dout2_1/dx:\", x.grad)\n",
	"x.grad.zero_()\n",
	"out2_2.backward(retain_graph=True)\n",
	"print(\"dout2_2/dx:\", x.grad) #32*42 = 1344"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "irl_python3",
	"language": "python",
	"name": "irl_python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}