Skip to content

Instantly share code, notes, and snippets.

@apowers313
Created July 25, 2024 00:46
Show Gist options
  • Save apowers313/4b591132ed284c382553b05cebccaffe to your computer and use it in GitHub Desktop.
Save apowers313/4b591132ed284c382553b05cebccaffe to your computer and use it in GitHub Desktop.
CuPy Profiling
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Info"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wed Jul 24 17:41:09 PDT 2024\n",
"=========\n",
"Linux 39f11d97b672 5.15.0-113-generic #123-Ubuntu SMP Mon Jun 10 08:16:17 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux\n",
"=========\n",
"nvcc: NVIDIA (R) Cuda compiler driver\n",
"Copyright (c) 2005-2022 NVIDIA Corporation\n",
"Built on Mon_Oct_24_19:12:58_PDT_2022\n",
"Cuda compilation tools, release 12.0, V12.0.76\n",
"Build cuda_12.0.r12.0/compiler.31968024_0\n",
"=========\n",
"Wed Jul 24 17:41:10 2024 \n",
"+---------------------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 535.183.01 Driver Version: 535.183.01 CUDA Version: 12.2 |\n",
"|-----------------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|=========================================+======================+======================|\n",
"| 0 Tesla T4 Off | 00000000:3B:00.0 Off | 0 |\n",
"| N/A 40C P8 9W / 70W | 2MiB / 15360MiB | 0% Default |\n",
"| | | N/A |\n",
"+-----------------------------------------+----------------------+----------------------+\n",
" \n",
"+---------------------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=======================================================================================|\n",
"| No running processes found |\n",
"+---------------------------------------------------------------------------------------+\n",
"=========\n",
"CuPy version 13.2.0\n",
"=========\n",
"NumPy version 1.26.4\n"
]
}
],
"source": [
"!date\n",
"print(\"=========\")\n",
"!uname -a\n",
"print(\"=========\")\n",
"import os\n",
"os.environ[\"PATH\"] += os.pathsep + \"/usr/local/cuda/bin\"\n",
"!nvcc --version\n",
"print(\"=========\")\n",
"!nvidia-smi\n",
"print(\"=========\")\n",
"import cupy as cp\n",
"print(\"CuPy version\", cp.__version__)\n",
"print(\"=========\")\n",
"import numpy as np\n",
"print(\"NumPy version\", np.__version__)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Examples"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"f : CPU: 79.255 us +/- 0.927 (min: 78.492 / max: 85.846) us GPU-0: 84.850 us +/- 1.108 (min: 82.592 / max: 92.064) us\n"
]
}
],
"source": [
"# https://docs.cupy.dev/en/stable/reference/generated/cupyx.profiler.benchmark.html\n",
"import cupy as cp\n",
"from cupyx.profiler import benchmark\n",
"\n",
"def f(a, b):\n",
" return 3 * cp.sin(-a) * b\n",
"\n",
"a = 0.5 - cp.random.random((100,))\n",
"b = cp.random.random((100,))\n",
"print(benchmark(f, (a, b), n_repeat=1000))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"my_func : CPU: 97.239 us +/- 9.314 (min: 94.165 / max: 187.152) us GPU-0: 263.434 us +/- 23.482 (min: 255.840 / max: 376.352) us\n"
]
}
],
"source": [
"# https://docs.cupy.dev/en/stable/user_guide/performance.html\n",
"import cupy as cp\n",
"from cupyx.profiler import benchmark\n",
"\n",
"def my_func(a):\n",
" return cp.sqrt(cp.sum(a**2, axis=-1))\n",
"\n",
"a = cp.random.random((256, 1024))\n",
"print(benchmark(my_func, (a,), n_repeat=10000)) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Profiling"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import cupy as cp\n",
"from cupyx.profiler import benchmark\n",
"\n",
"# https://github.com/cupy/cupy-benchmark/blob/master/benchmarks/numpy/bench_linalg.py\n",
"a = np.arange(60000000.0).reshape(1500, 40000)\n",
"b = np.arange(240000000.0).reshape(40000, 6000)\n",
"c = np.arange(6000)\n",
"d = np.arange(40000)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## NumPy"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"33.8 ms ± 2.03 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"def np_rand_max(a, b):\n",
" np.dot(d, np.dot(b, c))\n",
"\n",
"%timeit np_rand_max(a, b)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## CuPy"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"33.4 ms ± 1.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"def cp_rand_max(a, b):\n",
" # https://github.com/cupy/cupy-benchmark/tree/master?tab=readme-ov-file#numpy-benchmark\n",
" # the following was stolen from the @sync decorator used in CuPy's emulation\n",
" # of NumPy benchmarks\n",
" # https://github.com/cupy/cupy-benchmark/blob/master/benchmarks/utils/helper.py\n",
" event = cp.cuda.stream.Event()\n",
" event.record()\n",
" event.synchronize()\n",
" cp.dot(d, cp.dot(b, c))\n",
" event = cp.cuda.stream.Event()\n",
" event.record()\n",
" event.synchronize()\n",
"\n",
"%timeit cp_rand_max(a, b)\n",
"# print(benchmark(cp_rand_max, (a,b), n_repeat=10)) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment