Created
December 30, 2022 13:07
-
-
Save yoshipon/dc7e14635d48656c767d47132351eaf6 to your computer and use it in GitHub Desktop.
torch.linalg.inv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "cb572805", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from contextlib import contextmanager\n", | |
"\n", | |
"import torch\n", | |
"import cupy as cp" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "ba845d71", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Collecting environment information...\n", | |
"PyTorch version: 1.13.1+cu117\n", | |
"Is debug build: False\n", | |
"CUDA used to build PyTorch: 11.7\n", | |
"ROCM used to build PyTorch: N/A\n", | |
"\n", | |
"OS: Ubuntu 20.04.5 LTS (x86_64)\n", | |
"GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0\n", | |
"Clang version: Could not collect\n", | |
"CMake version: version 3.16.3\n", | |
"Libc version: glibc-2.31\n", | |
"\n", | |
"Python version: 3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0] (64-bit runtime)\n", | |
"Python platform: Linux-3.10.0-862.el7.x86_64-x86_64-with-glibc2.31\n", | |
"Is CUDA available: True\n", | |
"CUDA runtime version: 11.7.99\n", | |
"CUDA_MODULE_LOADING set to: LAZY\n", | |
"GPU models and configuration: \n", | |
"GPU 0: Tesla V100-SXM2-16GB\n", | |
"GPU 1: Tesla V100-SXM2-16GB\n", | |
"GPU 2: Tesla V100-SXM2-16GB\n", | |
"GPU 3: Tesla V100-SXM2-16GB\n", | |
"\n", | |
"Nvidia driver version: 510.47.03\n", | |
"cuDNN version: Probably one of the following:\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn.so.8.5.0\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.5.0\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.5.0\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.5.0\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.5.0\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.5.0\n", | |
"/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.5.0\n", | |
"HIP runtime version: N/A\n", | |
"MIOpen runtime version: N/A\n", | |
"Is XNNPACK available: True\n", | |
"\n", | |
"Versions of relevant libraries:\n", | |
"[pip3] mypy==0.991\n", | |
"[pip3] mypy-extensions==0.4.3\n", | |
"[pip3] numpy==1.21.5\n", | |
"[pip3] numpydoc==1.4.0\n", | |
"[pip3] pytorch-ignite==0.4.10\n", | |
"[pip3] torch==1.13.1+cu117\n", | |
"[pip3] torchaudio==0.13.1+cu117\n", | |
"[pip3] torchvision==0.14.1+cu117\n", | |
"[conda] blas 1.0 mkl \n", | |
"[conda] mkl 2021.4.0 h06a4308_640 \n", | |
"[conda] mkl-service 2.4.0 py39h7f8727e_0 \n", | |
"[conda] mkl_fft 1.3.1 py39hd3c417c_0 \n", | |
"[conda] mkl_random 1.2.2 py39h51133e4_0 \n", | |
"[conda] numpy 1.21.5 py39h6c91a56_3 \n", | |
"[conda] numpy-base 1.21.5 py39ha15fc14_3 \n", | |
"[conda] numpydoc 1.4.0 py39h06a4308_0 \n", | |
"[conda] pytorch-ignite 0.4.10 pypi_0 pypi\n", | |
"[conda] torch 1.13.1+cu117 pypi_0 pypi\n", | |
"[conda] torchaudio 0.13.1+cu117 pypi_0 pypi\n", | |
"[conda] torchvision 0.14.1+cu117 pypi_0 pypi\n" | |
] | |
} | |
], | |
"source": [ | |
"! python collect_env.py" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "abe8a9ac", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"OS : Linux-3.10.0-862.el7.x86_64-x86_64-with-glibc2.31\n", | |
"Python Version : 3.9.13\n", | |
"CuPy Version : 11.4.0\n", | |
"CuPy Platform : NVIDIA CUDA\n", | |
"NumPy Version : 1.21.5\n", | |
"SciPy Version : 1.8.1\n", | |
"Cython Build Version : 0.29.32\n", | |
"Cython Runtime Version : 0.29.32\n", | |
"CUDA Root : /usr/local/cuda\n", | |
"nvcc PATH : /usr/local/cuda/bin/nvcc\n", | |
"CUDA Build Version : 11080\n", | |
"CUDA Driver Version : 11060\n", | |
"CUDA Runtime Version : 11070\n", | |
"cuBLAS Version : (available)\n", | |
"cuFFT Version : 10702\n", | |
"cuRAND Version : 10210\n", | |
"cuSOLVER Version : (11, 4, 0)\n", | |
"cuSPARSE Version : (available)\n", | |
"NVRTC Version : (11, 7)\n", | |
"Thrust Version : 101501\n", | |
"CUB Build Version : 101501\n", | |
"Jitify Build Version : 4a37de0\n", | |
"cuDNN Build Version : 8600\n", | |
"cuDNN Version : 8500\n", | |
"NCCL Build Version : 21505\n", | |
"NCCL Runtime Version : 21304\n", | |
"cuTENSOR Version : None\n", | |
"cuSPARSELt Build Version : None\n", | |
"Device 0 Name : Tesla V100-SXM2-16GB\n", | |
"Device 0 Compute Capability : 70\n", | |
"Device 0 PCI Bus ID : 0000:3D:00.0\n", | |
"Device 1 Name : Tesla V100-SXM2-16GB\n", | |
"Device 1 Compute Capability : 70\n", | |
"Device 1 PCI Bus ID : 0000:3E:00.0\n", | |
"Device 2 Name : Tesla V100-SXM2-16GB\n", | |
"Device 2 Compute Capability : 70\n", | |
"Device 2 PCI Bus ID : 0000:B1:00.0\n", | |
"Device 3 Name : Tesla V100-SXM2-16GB\n", | |
"Device 3 Compute Capability : 70\n", | |
"Device 3 PCI Bus ID : 0000:B2:00.0\n" | |
] | |
} | |
], | |
"source": [ | |
"cp.show_config()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "3cdda1a7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def _torch_alloc(size):\n", | |
" device = cp.cuda.Device().id\n", | |
" tensor = torch.empty(size, dtype=torch.uint8, device=device)\n", | |
" return cp.cuda.MemoryPointer(cp.cuda.UnownedMemory(tensor.data_ptr(), size, tensor), 0)\n", | |
"\n", | |
"cp.cuda.set_allocator(_torch_alloc)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "011cb567", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def cupy_inv(x_):\n", | |
" x = cp.from_dlpack(x_)\n", | |
" return torch.from_dlpack(cp.linalg.inv(x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "5d3cf714", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dtype = torch.complex64" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "14c9d1e8", | |
"metadata": {}, | |
"source": [ | |
"# inv for 100000 x 4 x 4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "00d60af4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"size = (100000, 4, 4)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "1d82ccb7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_ = cupy_inv(torch.randn(*size, dtype=dtype, device=\"cuda\")) # initialize CuPy's kernel\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "9144c2cb", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"639 µs ± 270 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = torch.linalg.inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "e3162cfe", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"288 µs ± 1.83 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = cupy_inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f191dccf", | |
"metadata": {}, | |
"source": [ | |
"# inv for 100000 x 8 x 8" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "68655f98", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"size = (100000, 8, 8)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "d415cd3a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_ = cupy_inv(torch.randn(*size, dtype=dtype, device=\"cuda\")) # initialize CuPy's kernel\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "93362ea7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.47 ms ± 358 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = torch.linalg.inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "7cc0aa71", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"697 µs ± 1.33 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = cupy_inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "a67f4a27", | |
"metadata": {}, | |
"source": [ | |
"# inv for 100000 x 16 x 16" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "082c2aed", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"size = (100000, 16, 16)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "3dd8c8ca", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_ = cupy_inv(torch.randn(*size, dtype=dtype, device=\"cuda\")) # initialize CuPy's kernel\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "3272c1af", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"11.9 ms ± 11.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = torch.linalg.inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "4714cd63", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"4.39 ms ± 846 ns per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = cupy_inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "e54fb8ed", | |
"metadata": {}, | |
"source": [ | |
"# inv for 100000 x 32 x 32" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "a75e181f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"size = (100000, 32, 32)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "555e2610", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"_ = cupy_inv(torch.randn(*size, dtype=dtype, device=\"cuda\")) # initialize CuPy's kernel\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "99fd1017", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"36.6 ms ± 67.5 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = torch.linalg.inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "a57a4192", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"24.5 ms ± 6.64 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit x = torch.randn(*size, dtype=dtype, device=\"cuda\"); torch.cuda.synchronize()\n", | |
"y = cupy_inv(x)\n", | |
"torch.cuda.synchronize()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment