Created
November 18, 2021 11:12
-
-
Save gmarkall/b1c61126382dd25ad5deda2ec2469496 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "cf00c6e1-09a9-470b-a5c8-bd4e70bf11e5", | |
"metadata": {}, | |
"source": [ | |
"# numba: guvectorize vs kernel with cuda" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "bff10d74-4b74-4edf-9587-f66db6c568d1", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"('0.55.0dev0+600.ga25a17e48.dirty', '9.6.0')" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from numba import njit, guvectorize, float64, cuda\n", | |
"import cupy\n", | |
"import numba\n", | |
"numba.__version__, cupy.__version__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "c03da363-4505-45a7-a52d-efcdedf98a7b", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Basic function\n", | |
"@cuda.jit(device=True)\n", | |
"def f(vec2):\n", | |
" x = vec2[0]\n", | |
" px = vec2[1] \n", | |
" x = x + px\n", | |
" vec2[0] = x\n", | |
" vec2[1] = px" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "7950d1e7-8ea6-4b96-a88c-72078e67fe38", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@guvectorize(['void(float64[:], float64[:])'], '(n)->(n)', target='cuda')\n", | |
"def vf(a_in, a_out):\n", | |
" v = a_in\n", | |
" f(a_in)\n", | |
" a_out[0] = v[0]\n", | |
" a_out[1] = v[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "f94117ea-3d7f-4361-9e7e-2ad74b7c3a0d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 323 µs, sys: 0 ns, total: 323 µs\n", | |
"Wall time: 328 µs\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# Make input and output arrays\n", | |
"gu_vec=cupy.random.rand(100_000_000, 2)\n", | |
"kernel_vec = gu_vec.copy()\n", | |
"gu_scratch = cupy.zeros_like(gu_vec)\n", | |
"\n", | |
"# Doesn't seem to work\n", | |
"#vf(pvec[0:3], gu_scratch[0:3]).copy_to_host()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "47d48527-0b86-4c2a-a4de-575077864bc2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 7.69 ms, sys: 0 ns, total: 7.69 ms\n", | |
"Wall time: 10.5 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"guvec_out = vf(gu_vec, gu_scratch)\n", | |
"cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "8a0f5f22-5c82-4f36-b1ed-ae36757feffd", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0.84482646, 0.29033154],\n", | |
" [1.10745473, 0.45235994],\n", | |
" [0.59781448, 0.44374413],\n", | |
" [0.54682673, 0.54149014],\n", | |
" [0.55322065, 0.23752734]])" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"guvec_out[0:5].copy_to_host()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "75a4de2a-c9d8-4a11-a4c5-5f43dc978aee", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Write a simple kernel\n", | |
"@cuda.jit('void(float64[:,::1], float64[:,::1])')\n", | |
"def kf(a_in, a_out):\n", | |
" i = cuda.grid(1)\n", | |
" v = a_in[i, :]\n", | |
" f(v)\n", | |
" for j in range(len(v)):\n", | |
" a_out[i, j] = v[j]\n", | |
" \n", | |
"\n", | |
"# Force it to compile\n", | |
"kernel_out = cupy.zeros_like(kernel_vec)\n", | |
"\n", | |
"#kf[1,1](vec[0:1], kernel_out[0:1]) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "531f144e-b301-47d4-a41e-2e65aec4885b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 45 ms, sys: 51 µs, total: 45 ms\n", | |
"Wall time: 81.1 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# This is much faster\n", | |
"kf[len(kernel_vec),1](kernel_vec, kernel_out)\n", | |
"cuda.synchronize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "e6b4b4ea-d8b6-47d7-adb3-38ed021b09e4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0.84482646, 0.29033154],\n", | |
" [1.10745473, 0.45235994],\n", | |
" [0.59781448, 0.44374413]])" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"kernel_out[0:3]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "25f1fcfd-b1be-4784-90e0-eb612d4466aa", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cupy.testing.assert_array_equal(guvec_out, kernel_out)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "f7dcc610-60e2-4b0f-b7ec-a3980e8f45ae", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment