yang-zhang · November 14, 2018 14:24
diff --git a/pytorch-losses-in-plain-python-04.ipynb b/pytorch-losses-in-plain-python-04.ipynb
 {
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import numpy as np\nimport torch\nimport torch.nn as nn\n\ntorch.__version__",
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 1,
          "data": {
            "text/plain": "'0.4.0'"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## L1Loss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 3)\ny = torch.randn(2, 3)",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x",
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/plain": "tensor([[ 0.2195,  2.2807, -0.1209],\n        [ 0.9766,  0.3239, -0.7328]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "tensor([[ 0.4257, -0.5855, -1.7771],\n        [ 0.1043,  0.9405,  1.4275]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.L1Loss()(x, y)",
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "tensor(1.3963)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.L1Loss(reduce=False)(x, y)",
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 6,
          "data": {
            "text/plain": "tensor([[ 0.2062,  2.8662,  1.6561],\n        [ 0.8723,  0.6167,  2.1603]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "abs(x.numpy() - y.numpy())",
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 7,
          "data": {
            "text/plain": "array([[0.20620236, 2.866216  , 1.6561494 ],\n       [0.8723362 , 0.61668336, 2.1603217 ]], dtype=float32)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "abs(x.numpy() - y.numpy()).mean()",
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": "1.3963181"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## MSELoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 3)\ny = torch.randn(2, 3)\n\nx",
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "tensor([[ 1.2735, -1.3790,  0.7724],\n        [-0.8411,  0.9650,  1.8713]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "tensor([[-0.4678, -0.5274,  1.4710],\n        [ 1.2896, -1.0243, -2.1329]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MSELoss(reduce=False)(x, y)",
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "tensor([[  3.0320,   0.7252,   0.4880],\n        [  4.5399,   3.9571,  16.0338]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MSELoss()(x, y)",
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 12,
          "data": {
            "text/plain": "tensor(4.7960)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "(x.numpy() - y.numpy())**2",
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 13,
          "data": {
            "text/plain": "array([[ 3.0320182 ,  0.7252073 ,  0.48800114],\n       [ 4.5399084 ,  3.957067  , 16.033762  ]], dtype=float32)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "((x.numpy() - y.numpy())**2).mean()",
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 14,
          "data": {
            "text/plain": "4.7959943"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## CrossEntropyLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\ny = torch.LongTensor(2).random_(4)\nx",
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 15,
          "data": {
            "text/plain": "tensor([[-1.3165,  0.4866, -0.0692, -0.6763],\n        [ 1.6869,  0.8914,  0.9729,  0.2508]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 16,
          "data": {
            "text/plain": "tensor([ 0,  1])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.CrossEntropyLoss(reduce=False)(x, y), nn.CrossEntropyLoss()(x, y)",
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 17,
          "data": {
            "text/plain": "(tensor([ 2.5214,  1.5743]), tensor(2.0479))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()\n\nlst = []\nfor k in range(len(x)):\n    lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))\nlst, np.mean(lst)",
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 18,
          "data": {
            "text/plain": "([2.5214107, 1.5743357], 2.0478733)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## NLLLoss"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "LogSoftmax"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\nx",
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 19,
          "data": {
            "text/plain": "tensor([[-0.2104,  0.4826,  1.4339,  0.3647],\n        [ 1.4450,  0.7018,  0.8739,  0.0702]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = nn.LogSoftmax(dim=1)(x)\ny",
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 20,
          "data": {
            "text/plain": "tensor([[-2.2980, -1.6050, -0.6537, -1.7229],\n        [-0.8300, -1.5732, -1.4011, -2.2048]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\nlst = []\nfor k in range(len(x)):\n    lst.append(np.log( np.exp(x[k]) / np.exp(x[k]).sum()))\nlst",
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 21,
          "data": {
            "text/plain": "[array([-2.2980032, -1.6049862, -0.6537388, -1.7228818], dtype=float32),\n array([-0.83002716, -1.573237  , -1.4011275 , -2.2048173 ], dtype=float32)]"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "NLLLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x0 = torch.randn(3, 4)\nx = nn.LogSoftmax(dim=1)(x0)\nx0, x",
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 22,
          "data": {
            "text/plain": "(tensor([[-1.2759,  0.2851,  1.2872,  0.1168],\n         [-0.0027,  1.1844, -0.0642,  2.0069],\n         [ 0.6614, -0.1659,  2.3706,  1.2764]]),\n tensor([[-3.1252, -1.5642, -0.5622, -1.7325],\n         [-2.5399, -1.3528, -2.6013, -0.5303],\n         [-2.1761, -3.0034, -0.4669, -1.5610]]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = torch.LongTensor(3).random_(4)\ny",
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 23,
          "data": {
            "text/plain": "tensor([ 2,  3,  2])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.NLLLoss()(x, y)",
      "execution_count": 24,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 24,
          "data": {
            "text/plain": "tensor(0.5198)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.NLLLoss(reduce=False)(x, y)",
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 25,
          "data": {
            "text/plain": "tensor([ 0.5622,  0.5303,  0.4669])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 26,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x)):\n    lst.append(-x[k][y[k]])\n\nlst, np.mean(lst)",
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 27,
          "data": {
            "text/plain": "([0.5621566, 0.5302982, 0.46686718], 0.519774)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## PoissonNLLLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\nx",
      "execution_count": 28,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 28,
          "data": {
            "text/plain": "tensor([[ 0.4900, -0.5364, -2.0116,  0.1387],\n        [ 1.2209, -1.0969,  0.4744, -1.8291]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = torch.randn(2, 4)\ny",
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 29,
          "data": {
            "text/plain": "tensor([[ 0.4241, -1.2123, -1.2027,  0.9005],\n        [-1.2251,  1.0288, -1.1340,  0.9558]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.PoissonNLLLoss()(x, y)",
      "execution_count": 30,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 30,
          "data": {
            "text/plain": "tensor(1.3125)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.PoissonNLLLoss(reduce=False)(x, y)",
      "execution_count": 31,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 31,
          "data": {
            "text/plain": "tensor([[ 1.4245, -0.0655, -2.2855,  1.0239],\n        [ 4.8860,  1.4624,  2.1451,  1.9088]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 32,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "# target∗log(target)−target+0.5∗log(2πtarget)\ndef sterling_approx(y):\n    return y*np.log(y) - y + 0.5*np.log(np.pi*y)",
      "execution_count": 33,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x)):\n    lsti = []\n    for i in range(len(x[k])):\n        lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)\n        lsti.append(lss)\n    lst.append(lsti)",
      "execution_count": 34,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "np.array(lst)",
      "execution_count": 35,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 35,
          "data": {
            "text/plain": "array([[ 1.42447615, -0.06545097, -2.28554368,  1.0238694 ],\n       [ 4.88598013,  1.04937647,  2.14506435,  1.90882862]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "np.mean(lst)",
      "execution_count": 36,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 36,
          "data": {
            "text/plain": "1.2608250585891174"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## KLDivLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.rand(2, 3)\ny = torch.rand(2, 3)\nx",
      "execution_count": 37,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 37,
          "data": {
            "text/plain": "tensor([[ 0.7100,  0.8026,  0.8067],\n        [ 0.5638,  0.7029,  0.9036]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 38,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 38,
          "data": {
            "text/plain": "tensor([[ 0.8756,  0.8250,  0.7681],\n        [ 0.7691,  0.6991,  0.2621]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.KLDivLoss()(x, y)",
      "execution_count": 39,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 39,
          "data": {
            "text/plain": "tensor(-0.7244)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.KLDivLoss(reduce=False)(x, y)",
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 40,
          "data": {
            "text/plain": "tensor([[-0.7380, -0.8208, -0.8223],\n        [-0.6355, -0.7417, -0.5878]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 41,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x)):\n    lsti = []\n    for j in range(len(x[i])):\n        # xi is already log \n        lsti.append(y[i][j] * (np.log(y[i][j]) - x[i][j]))\n    lst.append(lsti)\nnp.array(lst)",
      "execution_count": 42,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 42,
          "data": {
            "text/plain": "array([[-0.7380195 , -0.8208448 , -0.82230794],\n       [-0.63552344, -0.7416528 , -0.5877647 ]], dtype=float32)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "np.mean(lst)",
      "execution_count": 43,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 43,
          "data": {
            "text/plain": "-0.7243521"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## BCELoss"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Sigmoid"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\ny = nn.Sigmoid()(x)\nx",
      "execution_count": 44,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 44,
          "data": {
            "text/plain": "tensor([[-0.8194, -0.6531,  0.2668,  1.0594],\n        [-0.0148, -0.1114, -1.0826,  0.7729]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 45,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 45,
          "data": {
            "text/plain": "tensor([[ 0.3059,  0.3423,  0.5663,  0.7426],\n        [ 0.4963,  0.4722,  0.2530,  0.6842]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()",
      "execution_count": 46,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "1 / (1 + np.exp(-x))",
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 47,
          "data": {
            "text/plain": "array([[0.30588058, 0.34230143, 0.5663003 , 0.7425844 ],\n       [0.4963038 , 0.47218153, 0.25301307, 0.6841569 ]], dtype=float32)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### single label"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x0 = torch.randn(3)\nx = nn.Sigmoid()(x0)\nx",
      "execution_count": 48,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 48,
          "data": {
            "text/plain": "tensor([ 0.7399,  0.1722,  0.7459])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = torch.FloatTensor(3).random_(2)\ny",
      "execution_count": 49,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 49,
          "data": {
            "text/plain": "tensor([ 1.,  1.,  1.])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCELoss()(x, y)",
      "execution_count": 50,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 50,
          "data": {
            "text/plain": "tensor(0.7845)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCELoss(reduce=False)(x, y)",
      "execution_count": 51,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 51,
          "data": {
            "text/plain": "tensor([ 0.3012,  1.7590,  0.2932])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "loss = nn.BCELoss(size_average=False)\nlss = loss(x, y)\nlss",
      "execution_count": 52,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 52,
          "data": {
            "text/plain": "tensor(2.3534)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 53,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x)):\n    lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))\nlst, np.mean(lst)",
      "execution_count": 54,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 54,
          "data": {
            "text/plain": "([0.30117458, 1.7590082, 0.29319733], 0.78446007)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Equivalently"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x)):\n    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\nlst, np.mean(lst)",
      "execution_count": 55,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 55,
          "data": {
            "text/plain": "([0.3011745810508728, 1.7590081691741943, 0.2931973338127136],\n 0.7844600280125936)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### multilabel"
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "x0 = torch.randn(3, 2)\nx = nn.Sigmoid()(x0)\nx",
      "execution_count": 56,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 56,
          "data": {
            "text/plain": "tensor([[ 0.6020,  0.7536],\n        [ 0.1221,  0.1418],\n        [ 0.6682,  0.5789]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = torch.FloatTensor(3, 2).random_(2)\ny",
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 57,
          "data": {
            "text/plain": "tensor([[ 0.,  0.],\n        [ 0.,  1.],\n        [ 1.,  0.]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCELoss()(x, y)",
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 58,
          "data": {
            "text/plain": "tensor(0.9456)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCELoss(reduce=False)(x, y)",
      "execution_count": 59,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 59,
          "data": {
            "text/plain": "tensor([[ 0.9213,  1.4006],\n        [ 0.1302,  1.9536],\n        [ 0.4031,  0.8649]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 60,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x)):\n    lsti = []\n    for j in range(len(x[i])):\n        lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))\n    lst.append(lsti)\nnp.array(lst), np.mean(lst)",
      "execution_count": 61,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 61,
          "data": {
            "text/plain": "(array([[0.92125939, 1.40064401],\n        [0.13017042, 1.95358479],\n        [0.40310806, 0.8648764 ]]), 0.9456071787240529)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Equivalently"
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x)):\n    lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\nnp.array(lst), np.mean(lst)",
      "execution_count": 62,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 62,
          "data": {
            "text/plain": "(array([[0.9212594 , 1.4006441 ],\n        [0.13017043, 1.9535848 ],\n        [0.40310806, 0.8648764 ]], dtype=float32), 0.9456072)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## BCEWithLogitsLoss"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "This is just simply adding a sigmoid in front of BCELoss above."
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### single label"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(3)\nx",
      "execution_count": 63,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 63,
          "data": {
            "text/plain": "tensor([ 0.0757, -0.4018, -0.2471])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "xs = nn.Sigmoid()(x)\nxs",
      "execution_count": 64,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 64,
          "data": {
            "text/plain": "tensor([ 0.5189,  0.4009,  0.4385])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = torch.FloatTensor(3).random_(2)\ny",
      "execution_count": 65,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 65,
          "data": {
            "text/plain": "tensor([ 0.,  0.,  0.])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCELoss()(xs, y)",
      "execution_count": 66,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 66,
          "data": {
            "text/plain": "tensor(0.6071)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCEWithLogitsLoss()(x, y)",
      "execution_count": 67,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 67,
          "data": {
            "text/plain": "tensor(0.6071)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### multilabel"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(3, 2)\nx",
      "execution_count": 68,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 68,
          "data": {
            "text/plain": "tensor([[-0.5793, -2.1602],\n        [-0.9967,  0.2993],\n        [ 0.7039, -1.8369]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "xs = nn.Sigmoid()(x)\nxs",
      "execution_count": 69,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 69,
          "data": {
            "text/plain": "tensor([[ 0.3591,  0.1034],\n        [ 0.2696,  0.5743],\n        [ 0.6690,  0.1374]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = torch.FloatTensor(3, 2).random_(2)\ny",
      "execution_count": 70,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 70,
          "data": {
            "text/plain": "tensor([[ 0.,  0.],\n        [ 0.,  1.],\n        [ 1.,  1.]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCELoss()(xs, y)",
      "execution_count": 71,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 71,
          "data": {
            "text/plain": "tensor(0.6349)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.BCEWithLogitsLoss()(x, y)",
      "execution_count": 72,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 72,
          "data": {
            "text/plain": "tensor(0.6349)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## MarginRankingLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x1 = torch.randn(3)\nx2 = torch.randn(3)\ny = torch.FloatTensor(np.random.choice([1, -1], 3))\n\nx1, x2, y",
      "execution_count": 73,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 73,
          "data": {
            "text/plain": "(tensor([ 1.1512,  0.2352,  0.8746]),\n tensor([ 0.6363,  0.1684,  1.2253]),\n tensor([ 1.,  1., -1.]))"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MarginRankingLoss(margin=0.1)(x1, x2, y)",
      "execution_count": 74,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 74,
          "data": {
            "text/plain": "tensor(1.00000e-02 *\n       1.1065)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x1 = x1.numpy()\nx2 = x2.numpy()\ny = y.numpy()\nmargin=0.1",
      "execution_count": 75,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x1)):\n    lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))\n\nlst, np.mean(lst)",
      "execution_count": 76,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 76,
          "data": {
            "text/plain": "([0, 0.033195546269416815, 0], 0.011065182089805605)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## HingeEmbeddingLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 3)\ny = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))\n\nx",
      "execution_count": 77,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 77,
          "data": {
            "text/plain": "tensor([[ 0.5564, -0.1442,  0.3589],\n        [-1.4944,  0.0970,  0.3326]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 78,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 78,
          "data": {
            "text/plain": "tensor([[ 1., -1.,  1.],\n        [ 1.,  1., -1.]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.HingeEmbeddingLoss(margin=1)(x, y)",
      "execution_count": 79,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 79,
          "data": {
            "text/plain": "tensor(0.2216)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()\nmargin=1",
      "execution_count": 80,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst=[]\n\nfor i in range(len(x)):\n    lsti = []\n    for j in range(len(x[i])):\n        if y[i][j]==1:\n            lsti.append(x[i][j])\n        else:\n            lsti.append(max(0, margin-x[i][j]))\n    lst.append(lsti)\nnp.array(lst)",
      "execution_count": 81,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 81,
          "data": {
            "text/plain": "array([[ 0.55637378,  1.1442309 ,  0.35891026],\n       [-1.49435401,  0.09703328,  0.66744807]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "np.mean(lst)",
      "execution_count": 82,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 82,
          "data": {
            "text/plain": "0.2216070480644703"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## MultiLabelMarginLoss"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### one-sample example"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(1, 4)\ny = torch.LongTensor(1, 4).random_(-1, 4)\nx",
      "execution_count": 83,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 83,
          "data": {
            "text/plain": "tensor([[-0.1421,  2.3013, -0.0354,  0.2949]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 84,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 84,
          "data": {
            "text/plain": "tensor([[ 1,  1,  2,  1]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MultiLabelMarginLoss()(x, y)",
      "execution_count": 85,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 85,
          "data": {
            "text/plain": "tensor(0.5559)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 86,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x)):\n    sm = 0\n    js = []\n    for j in range(len(y[k])):\n        if y[k][j]<0: break \n        js.append(y[k][j])\n    for i in range(len(x[k])):\n        for j in js:\n            if (i not in js) and (i!=j):\n                print(i, j)\n                sm += max(0, 1-(x[k][j] - x[k][i]))\n    lst.append(sm/len(x[k]))",
      "execution_count": 87,
      "outputs": [
        {
          "output_type": "stream",
          "text": "0 1\n0 1\n0 2\n0 1\n3 1\n3 1\n3 2\n3 1\n",
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst, np.mean(lst)",
      "execution_count": 88,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 88,
          "data": {
            "text/plain": "([0.5558745376765728], 0.5558745376765728)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### multi-sample example"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(3, 4)\ny = torch.LongTensor(3, 4).random_(-1, 4)\nx",
      "execution_count": 89,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 89,
          "data": {
            "text/plain": "tensor([[-1.9302, -0.4035, -0.3108, -1.2367],\n        [ 0.9111,  0.6308, -0.2221, -0.4482],\n        [ 0.6294,  0.0362, -0.6057,  0.1499]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 90,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 90,
          "data": {
            "text/plain": "tensor([[ 2,  2, -1,  2],\n        [-1,  0,  2,  1],\n        [ 0,  0,  2,  3]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MultiLabelMarginLoss()(x, y)",
      "execution_count": 91,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 91,
          "data": {
            "text/plain": "tensor(0.4420)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()\n\nlst = []\nfor k in range(len(x)):\n    sm = 0\n    js = []\n    for j in range(len(y[k])):\n        if y[k][j]<0: break \n        js.append(y[k][j])\n    for i in range(len(x[k])):\n        for j in js:\n            if (i not in js) and (i!=j):\n                sm += max(0, 1-(x[k][j] - x[k][i]))\n    lst.append(sm/len(x[k]))\n\nlst, np.mean(lst)",
      "execution_count": 92,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 92,
          "data": {
            "text/plain": "([0.49064967036247253, 0.0, 0.8354874812066555], 0.44204571718970936)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## SmoothL1Loss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 3)\ny = torch.randn(2, 3)",
      "execution_count": 93,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.SmoothL1Loss()(x, y)",
      "execution_count": 94,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 94,
          "data": {
            "text/plain": "tensor(0.8434)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.SmoothL1Loss(reduce=False)(x, y)",
      "execution_count": 95,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 95,
          "data": {
            "text/plain": "tensor([[ 1.5014,  0.1854,  0.0780],\n        [ 0.0267,  1.6409,  1.6281]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy() \ny = y.numpy()",
      "execution_count": 96,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "def smoothl1loss(x, y):\n    if abs(x-y)<1: return 1/2*(x-y)**2\n    else: return abs(x-y)-1/2",
      "execution_count": 97,
      "outputs": []
    },
    {
      "metadata": {
        "scrolled": true,
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor i in range(len(x)):\n    lsti=[]\n    for j in range(len(x[i])):\n        lsti.append(smoothl1loss(x[i][j], y[i][j]))\n    lst.append(lsti)\nnp.array(lst), np.mean(lst)",
      "execution_count": 98,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 98,
          "data": {
            "text/plain": "(array([[1.50144911, 0.18539482, 0.07797238],\n        [0.02665457, 1.64091063, 1.62811232]]), 0.8434156358063841)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## SoftMarginLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\ny = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))\nx",
      "execution_count": 99,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 99,
          "data": {
            "text/plain": "tensor([[-0.4271,  1.6315, -0.0101, -0.3555],\n        [-0.0656,  1.0519,  1.1797, -0.1960]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 100,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 100,
          "data": {
            "text/plain": "tensor([[-1.,  1.,  1., -1.],\n        [-1., -1., -1., -1.]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.SoftMarginLoss()(x, y)",
      "execution_count": 101,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 101,
          "data": {
            "text/plain": "tensor(0.7463)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 102,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x)):\n    sm = 0\n    for i in range(len(x[k])):\n        sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))\n    lst.append(sm/len(x[k]))\n\nlst, np.mean(lst)",
      "execution_count": 103,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 103,
          "data": {
            "text/plain": "([0.47755350419146214, 1.0150123429598974], 0.7462829235756798)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## MultiLabelSoftMarginLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\ny = torch.FloatTensor(2, 4).random_(2)\nx",
      "execution_count": 104,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 104,
          "data": {
            "text/plain": "tensor([[ 0.2240,  0.6822,  0.2218, -0.2678],\n        [ 0.3166, -0.6609,  0.4050,  0.8907]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 105,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 105,
          "data": {
            "text/plain": "tensor([[ 1.,  1.,  1.,  1.],\n        [ 1.,  0.,  0.,  0.]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MultiLabelSoftMarginLoss()(x, y)",
      "execution_count": 106,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 106,
          "data": {
            "text/plain": "tensor(0.6919)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()",
      "execution_count": 107,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x)):\n    sm = 0\n    for i in range(len(x[k])):\n        sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\\\n            (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))\n    lst.append(sm/len(x[k]))\n\nlst, np.mean(lst)",
      "execution_count": 108,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 108,
          "data": {
            "text/plain": "([0.6052165105968161, 0.7785574945160404], 0.6918870025564283)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## CosineEmbeddingLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x1 = torch.randn(2, 3)\nx2 = torch.randn(2, 3)\ny = torch.FloatTensor(np.random.choice([1, -1], 2))\n\nx1",
      "execution_count": 109,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 109,
          "data": {
            "text/plain": "tensor([[-2.0857,  0.6833,  1.4676],\n        [-0.5992,  0.6303,  0.2105]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x2",
      "execution_count": 110,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 110,
          "data": {
            "text/plain": "tensor([[-0.0792, -0.9324, -1.6806],\n        [-0.5527, -0.0359, -0.1568]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 111,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 111,
          "data": {
            "text/plain": "tensor([-1.,  1.])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "scrolled": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)",
      "execution_count": 112,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 112,
          "data": {
            "text/plain": "tensor(0.2325)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x1 = x1.numpy()\nx2 = x2.numpy()\ny = y.numpy()\nmargin=0.1",
      "execution_count": 113,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from scipy.spatial.distance import cosine\n\ndef cos(x, y): return 1-cosine(x, y)",
      "execution_count": 114,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x1)):\n    if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))\n    elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))\nlst, np.mean(lst)",
      "execution_count": 115,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 115,
          "data": {
            "text/plain": "([0, 0.46503204107284546], 0.23251602053642273)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## MultiMarginLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = torch.randn(2, 4)\ny = torch.LongTensor(2).random_(4)\nx",
      "execution_count": 116,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 116,
          "data": {
            "text/plain": "tensor([[ 0.3508,  0.0864,  0.5680, -0.8116],\n        [ 1.6380, -0.7116,  0.1172, -0.3916]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y",
      "execution_count": 117,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 117,
          "data": {
            "text/plain": "tensor([ 1,  2])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.MultiMarginLoss(margin=0.9, p=2)(x, y)",
      "execution_count": 118,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 118,
          "data": {
            "text/plain": "tensor(1.1605)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x = x.numpy()\ny = y.numpy()\np=2\nmargin=0.9",
      "execution_count": 119,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x)):\n    sm = 0\n    for i in range(len(x[k])):\n        if i!= y[k]:\n            sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)\n    lst.append(sm/len(x[k]))\n\nlst, np.mean(lst)",
      "execution_count": 120,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 120,
          "data": {
            "text/plain": "([0.8162733480319643, 1.5046415572871512], 1.1604574526595577)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## TripletMarginLoss"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x1 = torch.randn(2, 3)\nx2 = torch.randn(2, 3)\nx3 = torch.randn(2, 3)\nmargin = 0.9\np = 2\n\nx1",
      "execution_count": 121,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 121,
          "data": {
            "text/plain": "tensor([[-1.0533, -0.2646, -1.1010],\n        [-0.2642, -0.9874, -1.2543]])"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)",
      "execution_count": 122,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 122,
          "data": {
            "text/plain": "tensor(0.9210)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x1 = x1.numpy()\nx2 = x2.numpy()\nx3 = x3.numpy()",
      "execution_count": 123,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "def d(x1, x2, p):\n    return sum((x1-x2)**p)**(1/p)",
      "execution_count": 124,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "lst = []\nfor k in range(len(x1)):\n    sm = 0\n    for i in range(len(x1[k])):\n        sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) \n    lst.append(sm/len(x1[k]))\n\nlst, np.mean(lst)",
      "execution_count": 125,
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 125,
          "data": {
            "text/plain": "([0.35117300979922, 1.4907858411409822], 0.9209794254701011)"
          },
          "metadata": {}
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## References"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "- https://pytorch.org/docs/0.4.0/nn.html#loss-functions\n- https://blog.csdn.net/zhangxb35/article/details/72464152"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "conda-env-pytorch04-py",
      "display_name": "Python [conda env:pytorch04]",
      "language": "python"
    },
    "toc": {
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": false,
      "base_numbering": 1,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": false
    },
    "language_info": {
      "name": "python",
      "version": "3.6.4",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "pytorch-losses-in-plain-python-04",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }