Created
July 25, 2024 00:46
-
-
Save apowers313/4b591132ed284c382553b05cebccaffe to your computer and use it in GitHub Desktop.
CuPy Profiling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Info" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wed Jul 24 17:41:09 PDT 2024\n", | |
"=========\n", | |
"Linux 39f11d97b672 5.15.0-113-generic #123-Ubuntu SMP Mon Jun 10 08:16:17 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux\n", | |
"=========\n", | |
"nvcc: NVIDIA (R) Cuda compiler driver\n", | |
"Copyright (c) 2005-2022 NVIDIA Corporation\n", | |
"Built on Mon_Oct_24_19:12:58_PDT_2022\n", | |
"Cuda compilation tools, release 12.0, V12.0.76\n", | |
"Build cuda_12.0.r12.0/compiler.31968024_0\n", | |
"=========\n", | |
"Wed Jul 24 17:41:10 2024 \n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"| NVIDIA-SMI 535.183.01 Driver Version: 535.183.01 CUDA Version: 12.2 |\n", | |
"|-----------------------------------------+----------------------+----------------------+\n", | |
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", | |
"| | | MIG M. |\n", | |
"|=========================================+======================+======================|\n", | |
"| 0 Tesla T4 Off | 00000000:3B:00.0 Off | 0 |\n", | |
"| N/A 40C P8 9W / 70W | 2MiB / 15360MiB | 0% Default |\n", | |
"| | | N/A |\n", | |
"+-----------------------------------------+----------------------+----------------------+\n", | |
" \n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"| Processes: |\n", | |
"| GPU GI CI PID Type Process name GPU Memory |\n", | |
"| ID ID Usage |\n", | |
"|=======================================================================================|\n", | |
"| No running processes found |\n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"=========\n", | |
"CuPy version 13.2.0\n", | |
"=========\n", | |
"NumPy version 1.26.4\n" | |
] | |
} | |
], | |
"source": [ | |
"!date\n", | |
"print(\"=========\")\n", | |
"!uname -a\n", | |
"print(\"=========\")\n", | |
"import os\n", | |
"os.environ[\"PATH\"] += os.pathsep + \"/usr/local/cuda/bin\"\n", | |
"!nvcc --version\n", | |
"print(\"=========\")\n", | |
"!nvidia-smi\n", | |
"print(\"=========\")\n", | |
"import cupy as cp\n", | |
"print(\"CuPy version\", cp.__version__)\n", | |
"print(\"=========\")\n", | |
"import numpy as np\n", | |
"print(\"NumPy version\", np.__version__)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Examples" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"f : CPU: 79.255 us +/- 0.927 (min: 78.492 / max: 85.846) us GPU-0: 84.850 us +/- 1.108 (min: 82.592 / max: 92.064) us\n" | |
] | |
} | |
], | |
"source": [ | |
"# https://docs.cupy.dev/en/stable/reference/generated/cupyx.profiler.benchmark.html\n", | |
"import cupy as cp\n", | |
"from cupyx.profiler import benchmark\n", | |
"\n", | |
"def f(a, b):\n", | |
" return 3 * cp.sin(-a) * b\n", | |
"\n", | |
"a = 0.5 - cp.random.random((100,))\n", | |
"b = cp.random.random((100,))\n", | |
"print(benchmark(f, (a, b), n_repeat=1000))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"my_func : CPU: 97.239 us +/- 9.314 (min: 94.165 / max: 187.152) us GPU-0: 263.434 us +/- 23.482 (min: 255.840 / max: 376.352) us\n" | |
] | |
} | |
], | |
"source": [ | |
"# https://docs.cupy.dev/en/stable/user_guide/performance.html\n", | |
"import cupy as cp\n", | |
"from cupyx.profiler import benchmark\n", | |
"\n", | |
"def my_func(a):\n", | |
" return cp.sqrt(cp.sum(a**2, axis=-1))\n", | |
"\n", | |
"a = cp.random.random((256, 1024))\n", | |
"print(benchmark(my_func, (a,), n_repeat=10000)) " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Profiling" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Setup" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import cupy as cp\n", | |
"from cupyx.profiler import benchmark\n", | |
"\n", | |
"# https://github.com/cupy/cupy-benchmark/blob/master/benchmarks/numpy/bench_linalg.py\n", | |
"a = np.arange(60000000.0).reshape(1500, 40000)\n", | |
"b = np.arange(240000000.0).reshape(40000, 6000)\n", | |
"c = np.arange(6000)\n", | |
"d = np.arange(40000)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## NumPy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"33.8 ms ± 2.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"def np_rand_max(a, b):\n", | |
" np.dot(d, np.dot(b, c))\n", | |
"\n", | |
"%timeit np_rand_max(a, b)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## CuPy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"33.4 ms ± 1.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"def cp_rand_max(a, b):\n", | |
" # https://github.com/cupy/cupy-benchmark/tree/master?tab=readme-ov-file#numpy-benchmark\n", | |
" # the following was stolen from the @sync decorator used in CuPy's emulation\n", | |
" # of NumPy benchmarks\n", | |
" # https://github.com/cupy/cupy-benchmark/blob/master/benchmarks/utils/helper.py\n", | |
" event = cp.cuda.stream.Event()\n", | |
" event.record()\n", | |
" event.synchronize()\n", | |
" cp.dot(d, cp.dot(b, c))\n", | |
" event = cp.cuda.stream.Event()\n", | |
" event.record()\n", | |
" event.synchronize()\n", | |
"\n", | |
"%timeit cp_rand_max(a, b)\n", | |
"# print(benchmark(cp_rand_max, (a,b), n_repeat=10)) " | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment