Last active
December 21, 2022 07:14
-
-
Save yang-zhang/c94304dbc7f1f9be3333742b7e8249a7 to your computer and use it in GitHub Desktop.
git/yang-zhang.github.io/ds_code/pytorch-losses-in-plain-python.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'0.4.1'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"import torch\n", | |
"import torch.nn as nn\n", | |
"\n", | |
"torch.__version__" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## L1Loss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = torch.randn(2, 3)\n", | |
"y = torch.randn(2, 3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1.4763, -0.0492, 0.7067],\n", | |
" [-0.3756, -1.8713, 1.5535]])" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-0.3171, -1.6037, -0.3038],\n", | |
" [ 0.3671, 0.6510, -2.2076]])" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.7917)" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.L1Loss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 1.1592, 1.5544, 1.0105],\n", | |
" [ 0.7427, 2.5223, 3.7611]])" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.L1Loss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[1.1591892, 1.5544204, 1.0105054],\n", | |
" [0.7426802, 2.522295 , 3.7610588]], dtype=float32)" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"abs(x.numpy() - y.numpy())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1.7916914" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"abs(x.numpy() - y.numpy()).mean()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## MSELoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 2.6290, 1.8988, -1.6431],\n", | |
" [ 0.3855, 0.4029, -0.4366]])" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 3)\n", | |
"y = torch.randn(2, 3)\n", | |
"\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.5482, 0.2927, -0.4202],\n", | |
" [ 0.6128, -0.4606, -0.8558]])" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 4.3301, 2.5797, 1.4955],\n", | |
" [ 0.0517, 0.7457, 0.1757]])" | |
] | |
}, | |
"execution_count": 66, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MSELoss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 67, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.5630)" | |
] | |
}, | |
"execution_count": 67, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MSELoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 68, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[4.330082 , 2.5796666 , 1.4955105 ],\n", | |
" [0.05166636, 0.7456514 , 0.17567822]], dtype=float32)" | |
] | |
}, | |
"execution_count": 68, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(x.numpy() - y.numpy())**2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1.5630425" | |
] | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"((x.numpy() - y.numpy())**2).mean()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## CrossEntropyLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.6293, 0.7490, 0.3947, -0.5312],\n", | |
" [-0.7038, -1.1594, 0.6817, -0.8557]])" | |
] | |
}, | |
"execution_count": 75, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"y = torch.LongTensor(2).random_(4)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 1, 0])" | |
] | |
}, | |
"execution_count": 76, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(tensor([ 1.0532, 1.8702]), tensor(1.4617))" | |
] | |
}, | |
"execution_count": 77, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.CrossEntropyLoss(reduce=False)(x, y), nn.CrossEntropyLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([1.0531884, 1.8702432], 1.4617158)" | |
] | |
}, | |
"execution_count": 78, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()\n", | |
"\n", | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" lst.append(-np.log(np.exp(x[k][y[k]]) / np.exp(x[k]).sum()))\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## NLLLoss" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"LogSoftmax" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1.4217, 0.4832, -0.0611, 0.6160],\n", | |
" [ 0.4007, -0.3147, 1.9577, -1.0041]])" | |
] | |
}, | |
"execution_count": 102, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-2.9596, -1.0547, -1.5990, -0.9219],\n", | |
" [-1.8686, -2.5839, -0.3116, -3.2733]])" | |
] | |
}, | |
"execution_count": 103, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = nn.LogSoftmax(dim=1)(x)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[array([-2.9596107 , -1.0546701 , -1.5989888 , -0.92192453], dtype=float32),\n", | |
" array([-1.8685771 , -2.583946 , -0.31156364, -3.2733235 ], dtype=float32)]" | |
] | |
}, | |
"execution_count": 104, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = x.numpy()\n", | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" lst.append(np.log( np.exp(x[k]) / np.exp(x[k]).sum()))\n", | |
"lst" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"NLLLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(tensor([[ 1.2387, 0.4941, 0.3404, 0.4291],\n", | |
" [ 1.1745, -0.1141, 0.8808, 1.8481],\n", | |
" [-0.6773, -0.9456, 0.4741, 0.8378]]),\n", | |
" tensor([[-0.8447, -1.5893, -1.7430, -1.6543],\n", | |
" [-1.3819, -2.6705, -1.6756, -0.7083],\n", | |
" [-2.2489, -2.5172, -1.0975, -0.7338]]))" | |
] | |
}, | |
"execution_count": 105, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x0 = torch.randn(3, 4)\n", | |
"x = nn.LogSoftmax(dim=1)(x0)\n", | |
"x0, x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 2, 1, 3])" | |
] | |
}, | |
"execution_count": 107, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = torch.LongTensor(3).random_(4)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.7157)" | |
] | |
}, | |
"execution_count": 108, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.NLLLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 1.7430, 2.6705, 0.7338])" | |
] | |
}, | |
"execution_count": 109, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.NLLLoss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 113, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([tensor(1.7430), tensor(2.6705), tensor(0.7338)], 1.7157394)" | |
] | |
}, | |
"execution_count": 113, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" lst.append(-x[k][y[k]])\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## PoissonNLLLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 130, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.6711, 1.5167, 1.4041, 0.2249],\n", | |
" [-0.5825, -1.2273, -1.5340, 0.8917]])" | |
] | |
}, | |
"execution_count": 130, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 131, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-0.4753, 0.1411, 0.3186, 0.1708],\n", | |
" [ 0.2095, -0.6558, -0.7816, 0.5834]])" | |
] | |
}, | |
"execution_count": 131, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = torch.randn(2, 4)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 132, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.5702)" | |
] | |
}, | |
"execution_count": 132, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.PoissonNLLLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 133, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 2.2753, 4.3434, 3.6244, 1.2137],\n", | |
" [ 0.6806, -0.5119, -0.9833, 1.9191]])" | |
] | |
}, | |
"execution_count": 133, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.PoissonNLLLoss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 134, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 142, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# target∗log(target)−target+0.5∗log(2πtarget)\n", | |
"def sterling_approx(y):\n", | |
" return y*np.log(y) - y + 0.5*np.log(np.pi*y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 143, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" lsti = []\n", | |
" for i in range(len(x[k])):\n", | |
" lss = np.exp(x[k,i])-y[k,i]*x[k,i] + (sterling_approx(y[k,i]) if y[k,i]>1 else 0)\n", | |
" lsti.append(lss)\n", | |
" lst.append(lsti)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 146, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 2.27534461, 4.34339952, 3.62439346, 1.21373343],\n", | |
" [ 0.68055761, -0.51185942, -0.9832679 , 1.91914582]])" | |
] | |
}, | |
"execution_count": 146, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.array(lst)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 147, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1.570180892944336" | |
] | |
}, | |
"execution_count": 147, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## KLDivLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[0.2269, 0.9956, 0.9354],\n", | |
" [0.1313, 0.7004, 0.9105]])" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.rand(2, 3)\n", | |
"y = torch.rand(2, 3)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1.4832, -0.0044, -0.0668],\n", | |
" [-2.0302, -0.3561, -0.0938]])" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"xlog = torch.log(x)\n", | |
"xlog" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[0.5502, 0.6448, 0.7401],\n", | |
" [0.7998, 0.2121, 0.2097]])" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.1529)" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.KLDivLoss()(xlog, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.4873, -0.2801, -0.1733],\n", | |
" [ 1.4451, -0.2534, -0.3079]])" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.KLDivLoss(reduce=False)(xlog, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"xlog = np.log(x)\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0.48733026, -0.2801091 , -0.17330758],\n", | |
" [ 1.4450648 , -0.2533762 , -0.3079228 ]], dtype=float32)" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x)):\n", | |
" lsti = []\n", | |
" for j in range(len(x[i])):\n", | |
" # xi is already log \n", | |
" lsti.append(y[i][j] * (np.log(y[i][j]) - xlog[i][j]))\n", | |
" lst.append(lsti)\n", | |
"np.array(lst)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.15294655" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## BCELoss" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Sigmoid" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 162, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.3220, -0.8002, -0.5503, -2.0414],\n", | |
" [ 1.0465, -0.2785, -1.6423, 1.1158]])" | |
] | |
}, | |
"execution_count": 162, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"y = nn.Sigmoid()(x)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 163, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.5798, 0.3100, 0.3658, 0.1149],\n", | |
" [ 0.7401, 0.4308, 0.1621, 0.7532]])" | |
] | |
}, | |
"execution_count": 163, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0.5798062 , 0.30998793, 0.36578804, 0.11492275],\n", | |
" [0.7401055 , 0.43081176, 0.1621461 , 0.7531997 ]], dtype=float32)" | |
] | |
}, | |
"execution_count": 165, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"1 / (1 + np.exp(-x))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### single label" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 174, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 0.5213, 0.5932, 0.5333])" | |
] | |
}, | |
"execution_count": 174, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x0 = torch.randn(3)\n", | |
"x = nn.Sigmoid()(x0)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 175, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 1., 1., 1.])" | |
] | |
}, | |
"execution_count": 175, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = torch.FloatTensor(3).random_(2)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 176, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.6008)" | |
] | |
}, | |
"execution_count": 176, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCELoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 177, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 0.6514, 0.5222, 0.6287])" | |
] | |
}, | |
"execution_count": 177, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCELoss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 178, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.8024)" | |
] | |
}, | |
"execution_count": 178, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"loss = nn.BCELoss(size_average=False)\n", | |
"lss = loss(x, y)\n", | |
"lss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 179, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 180, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.65144944, 0.52221346, 0.6287041], 0.600789)" | |
] | |
}, | |
"execution_count": 180, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x)):\n", | |
" lst.append(-np.log(x[i]) if y[i]==1 else -np.log(1-x[i]))\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Equivalently" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 184, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.65144944190979, 0.5222134590148926, 0.6287040710449219],\n", | |
" 0.6007889906565348)" | |
] | |
}, | |
"execution_count": 184, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x)):\n", | |
" lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### multilabel" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 189, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.2400, 0.4380],\n", | |
" [ 0.2651, 0.4915],\n", | |
" [ 0.3721, 0.5370]])" | |
] | |
}, | |
"execution_count": 189, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x0 = torch.randn(3, 2)\n", | |
"x = nn.Sigmoid()(x0)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 190, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 1., 1.],\n", | |
" [ 1., 1.],\n", | |
" [ 1., 0.]])" | |
] | |
}, | |
"execution_count": 190, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = torch.FloatTensor(3, 2).random_(2)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 191, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.0082)" | |
] | |
}, | |
"execution_count": 191, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCELoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 192, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 1.4272, 0.8255],\n", | |
" [ 1.3278, 0.7102],\n", | |
" [ 0.9886, 0.7700]])" | |
] | |
}, | |
"execution_count": 192, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCELoss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 193, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 196, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([[1.42715609, 0.82551563],\n", | |
" [1.32778549, 0.71021408],\n", | |
" [0.9886421 , 0.76996785]]), 1.0082135393626286)" | |
] | |
}, | |
"execution_count": 196, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x)):\n", | |
" lsti = []\n", | |
" for j in range(len(x[i])):\n", | |
" lsti.append(-np.log(x[i][j]) if y[i][j]==1 else -np.log(1-x[i][j]))\n", | |
" lst.append(lsti)\n", | |
"np.array(lst), np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Equivalently" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 198, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([[1.4271561 , 0.8255156 ],\n", | |
" [1.3277855 , 0.7102141 ],\n", | |
" [0.9886421 , 0.76996785]], dtype=float32), 1.0082136)" | |
] | |
}, | |
"execution_count": 198, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x)):\n", | |
" lst.append(-np.log(x[i])*y[i] + -np.log(1-x[i])*(1-y[i]))\n", | |
"np.array(lst), np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## BCEWithLogitsLoss" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is just simply adding a sigmoid in front of BCELoss above." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### single label" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 206, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([-0.1104, 0.2577, -0.5487])" | |
] | |
}, | |
"execution_count": 206, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(3)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 207, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 0.4724, 0.5641, 0.3662])" | |
] | |
}, | |
"execution_count": 207, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"xs = nn.Sigmoid()(x)\n", | |
"xs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 208, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 0., 0., 0.])" | |
] | |
}, | |
"execution_count": 208, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = torch.FloatTensor(3).random_(2)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 209, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.6419)" | |
] | |
}, | |
"execution_count": 209, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCELoss()(xs, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 210, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.6419)" | |
] | |
}, | |
"execution_count": 210, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCEWithLogitsLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### multilabel" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 211, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1.4298, 0.1712],\n", | |
" [ 0.7382, -1.8834],\n", | |
" [-1.7065, 1.1530]])" | |
] | |
}, | |
"execution_count": 211, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(3, 2)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 212, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.1931, 0.5427],\n", | |
" [ 0.6766, 0.1320],\n", | |
" [ 0.1536, 0.7601]])" | |
] | |
}, | |
"execution_count": 212, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"xs = nn.Sigmoid()(x)\n", | |
"xs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 213, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 1., 1.],\n", | |
" [ 1., 0.],\n", | |
" [ 1., 1.]])" | |
] | |
}, | |
"execution_count": 213, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y = torch.FloatTensor(3, 2).random_(2)\n", | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 214, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.8226)" | |
] | |
}, | |
"execution_count": 214, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCELoss()(xs, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 216, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.8226)" | |
] | |
}, | |
"execution_count": 216, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.BCEWithLogitsLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## MarginRankingLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 221, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(tensor([-0.5451, 0.1447, -0.3011]),\n", | |
" tensor([ 0.1900, 0.6117, 1.5479]),\n", | |
" tensor([ 1., 1., -1.]))" | |
] | |
}, | |
"execution_count": 221, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x1 = torch.randn(3)\n", | |
"x2 = torch.randn(3)\n", | |
"y = torch.FloatTensor(np.random.choice([1, -1], 3))\n", | |
"\n", | |
"x1, x2, y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 222, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.4674)" | |
] | |
}, | |
"execution_count": 222, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MarginRankingLoss(margin=0.1)(x1, x2, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 223, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x1 = x1.numpy()\n", | |
"x2 = x2.numpy()\n", | |
"y = y.numpy()\n", | |
"margin=0.1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 224, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.835101580619812, 0.5670205116271972, 0], 0.4673740307490031)" | |
] | |
}, | |
"execution_count": 224, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x1)):\n", | |
" lst.append(max(0, -y[i]*(x1[i]-x2[i]) + margin))\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## HingeEmbeddingLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 235, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.4457, -1.5535, -0.1648],\n", | |
" [ 0.7037, 0.2432, 0.3004]])" | |
] | |
}, | |
"execution_count": 235, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 3)\n", | |
"y = torch.FloatTensor(np.random.choice([-1, 1], (2, 3)))\n", | |
"\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 236, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1., -1., 1.],\n", | |
" [ 1., 1., 1.]])" | |
] | |
}, | |
"execution_count": 236, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 237, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.6984)" | |
] | |
}, | |
"execution_count": 237, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.HingeEmbeddingLoss(margin=1)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 238, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()\n", | |
"margin=1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 239, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0.55430901, 2.55346417, -0.16479899],\n", | |
" [ 0.70371646, 0.24319194, 0.30040452]])" | |
] | |
}, | |
"execution_count": 239, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst=[]\n", | |
"\n", | |
"for i in range(len(x)):\n", | |
" lsti = []\n", | |
" for j in range(len(x[i])):\n", | |
" if y[i][j]==1:\n", | |
" lsti.append(x[i][j])\n", | |
" else:\n", | |
" lsti.append(max(0, margin-x[i][j]))\n", | |
" lst.append(lsti)\n", | |
"np.array(lst)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 240, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.6983811855316162" | |
] | |
}, | |
"execution_count": 240, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## MultiLabelMarginLoss" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This is a very confusing class. Great reference here: https://blog.csdn.net/zhangxb35/article/details/72464152" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### one-sample example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 244, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.3257, -1.2182, 1.4421, 0.2452]])" | |
] | |
}, | |
"execution_count": 244, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(1, 4)\n", | |
"y = torch.LongTensor(1, 4).random_(-1, 4)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 245, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 3, -1, -1, -1]])" | |
] | |
}, | |
"execution_count": 245, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 246, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.8194)" | |
] | |
}, | |
"execution_count": 246, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MultiLabelMarginLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 248, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 249, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0 3\n", | |
"1 3\n", | |
"2 3\n" | |
] | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" sm = 0\n", | |
" js = []\n", | |
" for j in range(len(y[k])):\n", | |
" if y[k][j]<0: break \n", | |
" js.append(y[k][j])\n", | |
" for i in range(len(x[k])):\n", | |
" for j in js:\n", | |
" if (i not in js) and (i!=j):\n", | |
" print(i, j)\n", | |
" sm += max(0, 1-(x[k][j] - x[k][i]))\n", | |
" lst.append(sm/len(x[k]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 250, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.8193658106029034], 0.8193658106029034)" | |
] | |
}, | |
"execution_count": 250, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### multi-sample example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 252, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1.3972, -0.5922, -0.7198, 0.8985],\n", | |
" [-0.6777, 0.3352, -0.1973, -0.0305],\n", | |
" [ 0.4067, 0.2513, -1.0973, -0.1837]])" | |
] | |
}, | |
"execution_count": 252, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(3, 4)\n", | |
"y = torch.LongTensor(3, 4).random_(-1, 4)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 253, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 3, 1, 0, 1],\n", | |
" [ 2, 0, 0, -1],\n", | |
" [ 1, -1, 2, 1]])" | |
] | |
}, | |
"execution_count": 253, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 254, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(1.2635)" | |
] | |
}, | |
"execution_count": 254, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MultiLabelMarginLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 255, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.8555163443088531, 2.5048549212515354, 0.4300655126571655],\n", | |
" 1.263478926072518)" | |
] | |
}, | |
"execution_count": 255, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()\n", | |
"\n", | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" sm = 0\n", | |
" js = []\n", | |
" for j in range(len(y[k])):\n", | |
" if y[k][j]<0: break \n", | |
" js.append(y[k][j])\n", | |
" for i in range(len(x[k])):\n", | |
" for j in js:\n", | |
" if (i not in js) and (i!=j):\n", | |
" sm += max(0, 1-(x[k][j] - x[k][i]))\n", | |
" lst.append(sm/len(x[k]))\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## SmoothL1Loss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 257, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = torch.randn(2, 3)\n", | |
"y = torch.randn(2, 3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 258, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.5490)" | |
] | |
}, | |
"execution_count": 258, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.SmoothL1Loss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 259, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.6491, 0.0651, 1.2454],\n", | |
" [ 0.3355, 0.9598, 0.0390]])" | |
] | |
}, | |
"execution_count": 259, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.SmoothL1Loss(reduce=False)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 260, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy() \n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 261, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def smoothl1loss(x, y):\n", | |
" if abs(x-y)<1: return 1/2*(x-y)**2\n", | |
" else: return abs(x-y)-1/2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 263, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([[0.64909291, 0.06508577, 1.24535966],\n", | |
" [0.33547111, 0.95977783, 0.03898569]]), 0.5489621638637431)" | |
] | |
}, | |
"execution_count": 263, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for i in range(len(x)):\n", | |
" lsti=[]\n", | |
" for j in range(len(x[i])):\n", | |
" lsti.append(smoothl1loss(x[i][j], y[i][j]))\n", | |
" lst.append(lsti)\n", | |
"np.array(lst), np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## SoftMarginLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 264, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-0.8887, -0.3107, -0.6408, -2.5345],\n", | |
" [ 0.2605, -0.1133, 0.2433, 0.3387]])" | |
] | |
}, | |
"execution_count": 264, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"y = torch.FloatTensor(np.random.choice([-1, 1], (2, 4)))\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 265, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 1., 1., 1., -1.],\n", | |
" [ 1., 1., 1., 1.]])" | |
] | |
}, | |
"execution_count": 265, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 266, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.7092)" | |
] | |
}, | |
"execution_count": 266, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.SoftMarginLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 267, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 268, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.8084959688801056, 0.6099205543628277], 0.7092082616214666)" | |
] | |
}, | |
"execution_count": 268, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" sm = 0\n", | |
" for i in range(len(x[k])):\n", | |
" sm += np.log(1 + np.exp(-y[k][i]*x[k][i]))\n", | |
" lst.append(sm/len(x[k]))\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## MultiLabelSoftMarginLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 269, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-1.9676, -1.4290, -0.1576, 0.6041],\n", | |
" [ 0.0584, -1.0734, -0.4568, 0.4787]])" | |
] | |
}, | |
"execution_count": 269, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"y = torch.FloatTensor(2, 4).random_(2)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 270, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0., 1., 1., 1.],\n", | |
" [ 1., 0., 1., 0.]])" | |
] | |
}, | |
"execution_count": 270, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 271, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.7315)" | |
] | |
}, | |
"execution_count": 271, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MultiLabelSoftMarginLoss()(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 272, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 274, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.7464252382614533, 0.7166620319227274], 0.7315436350920903)" | |
] | |
}, | |
"execution_count": 274, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" sm = 0\n", | |
" for i in range(len(x[k])):\n", | |
" sm -= y[k, i]*np.log(np.exp(x[k, i])/(1+np.exp(x[k, i]))) +\\\n", | |
" (1-y[k, i])*np.log(1/(1+np.exp(x[k, i])))\n", | |
" lst.append(sm/len(x[k]))\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## CosineEmbeddingLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 276, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.1394, -0.9875, -0.4814],\n", | |
" [ 0.4167, 0.9489, -0.2292]])" | |
] | |
}, | |
"execution_count": 276, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x1 = torch.randn(2, 3)\n", | |
"x2 = torch.randn(2, 3)\n", | |
"y = torch.FloatTensor(np.random.choice([1, -1], 2))\n", | |
"\n", | |
"x1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 277, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 0.3618, 1.1291, -1.3030],\n", | |
" [ 0.9274, 0.7760, -1.6018]])" | |
] | |
}, | |
"execution_count": 277, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 278, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([-1., 1.])" | |
] | |
}, | |
"execution_count": 278, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 279, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.1503)" | |
] | |
}, | |
"execution_count": 279, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.CosineEmbeddingLoss(margin=0.1)(x1, x2, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 280, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x1 = x1.numpy()\n", | |
"x2 = x2.numpy()\n", | |
"y = y.numpy()\n", | |
"margin=0.1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 281, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from scipy.spatial.distance import cosine\n", | |
"\n", | |
"def cos(x, y): return 1-cosine(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 282, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0, 0.3005916476249695], 0.15029582381248474)" | |
] | |
}, | |
"execution_count": 282, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x1)):\n", | |
" if y[k] == 1: lst.append(1-cos(x1[k], x2[k]))\n", | |
" elif y[k] == -1: lst.append(max(0, cos(x1[k], x2[k])-margin))\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## MultiMarginLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 283, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[-0.0025, -0.6019, 1.9809, -1.2663],\n", | |
" [ 0.6060, -0.0002, 0.9110, -0.6320]])" | |
] | |
}, | |
"execution_count": 283, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x = torch.randn(2, 4)\n", | |
"y = torch.LongTensor(2).random_(4)\n", | |
"x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 284, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([ 1, 3])" | |
] | |
}, | |
"execution_count": 284, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 285, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(3.4149)" | |
] | |
}, | |
"execution_count": 285, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.MultiMarginLoss(margin=0.9, p=2)(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 286, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x = x.numpy()\n", | |
"y = y.numpy()\n", | |
"p=2\n", | |
"margin=0.9" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 287, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([3.6083879542856043, 3.2214048583725967], 3.4148964063291007)" | |
] | |
}, | |
"execution_count": 287, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x)):\n", | |
" sm = 0\n", | |
" for i in range(len(x[k])):\n", | |
" if i!= y[k]:\n", | |
" sm += max(0, (margin - x[k, y[k]] + x[k, i])**p)\n", | |
" lst.append(sm/len(x[k]))\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## TripletMarginLoss" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 288, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor([[ 2.0001, 1.2658, -1.1397],\n", | |
" [ 0.9793, -0.3433, -0.0746]])" | |
] | |
}, | |
"execution_count": 288, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x1 = torch.randn(2, 3)\n", | |
"x2 = torch.randn(2, 3)\n", | |
"x3 = torch.randn(2, 3)\n", | |
"margin = 0.9\n", | |
"p = 2\n", | |
"\n", | |
"x1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 289, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"tensor(0.4055)" | |
] | |
}, | |
"execution_count": 289, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nn.TripletMarginLoss(margin=margin, p=p)(x1, x2, x3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 290, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"x1 = x1.numpy()\n", | |
"x2 = x2.numpy()\n", | |
"x3 = x3.numpy()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 291, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def d(x1, x2, p):\n", | |
" return sum((x1-x2)**p)**(1/p)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 292, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"([0.0, 0.8110052643651849], 0.40550263218259247)" | |
] | |
}, | |
"execution_count": 292, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lst = []\n", | |
"for k in range(len(x1)):\n", | |
" sm = 0\n", | |
" for i in range(len(x1[k])):\n", | |
" sm += max(d(x1[k], x2[k], p)-d(x1[k], x3[k], p)+margin, 0) \n", | |
" lst.append(sm/len(x1[k]))\n", | |
"\n", | |
"lst, np.mean(lst)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## References" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"- https://pytorch.org/docs/0.4.0/nn.html#loss-functions\n", | |
"- https://blog.csdn.net/zhangxb35/article/details/72464152" | |
] | |
} | |
], | |
"metadata": { | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/c94304dbc7f1f9be3333742b7e8249a7" | |
}, | |
"gist": { | |
"data": { | |
"description": "git/yang-zhang.github.io/ds_code/pytorch-losses-in-plain-python.ipynb", | |
"public": true | |
}, | |
"id": "c94304dbc7f1f9be3333742b7e8249a7" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": true, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": { | |
"height": "calc(100% - 180px)", | |
"left": "10px", | |
"top": "150px", | |
"width": "230px" | |
}, | |
"toc_section_display": true, | |
"toc_window_display": true | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment