Skip to content

Instantly share code, notes, and snippets.

@gmarkall
Created November 18, 2021 11:12
Show Gist options
  • Save gmarkall/b1c61126382dd25ad5deda2ec2469496 to your computer and use it in GitHub Desktop.
Save gmarkall/b1c61126382dd25ad5deda2ec2469496 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "cf00c6e1-09a9-470b-a5c8-bd4e70bf11e5",
"metadata": {},
"source": [
"# numba: guvectorize vs kernel with cuda"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "bff10d74-4b74-4edf-9587-f66db6c568d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('0.55.0dev0+600.ga25a17e48.dirty', '9.6.0')"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from numba import njit, guvectorize, float64, cuda\n",
"import cupy\n",
"import numba\n",
"numba.__version__, cupy.__version__"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c03da363-4505-45a7-a52d-efcdedf98a7b",
"metadata": {},
"outputs": [],
"source": [
"# Basic function\n",
"@cuda.jit(device=True)\n",
"def f(vec2):\n",
" x = vec2[0]\n",
" px = vec2[1] \n",
" x = x + px\n",
" vec2[0] = x\n",
" vec2[1] = px"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7950d1e7-8ea6-4b96-a88c-72078e67fe38",
"metadata": {},
"outputs": [],
"source": [
"@guvectorize(['void(float64[:], float64[:])'], '(n)->(n)', target='cuda')\n",
"def vf(a_in, a_out):\n",
" v = a_in\n",
" f(a_in)\n",
" a_out[0] = v[0]\n",
" a_out[1] = v[1]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f94117ea-3d7f-4361-9e7e-2ad74b7c3a0d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 323 µs, sys: 0 ns, total: 323 µs\n",
"Wall time: 328 µs\n"
]
}
],
"source": [
"%%time\n",
"# Make input and output arrays\n",
"gu_vec=cupy.random.rand(100_000_000, 2)\n",
"kernel_vec = gu_vec.copy()\n",
"gu_scratch = cupy.zeros_like(gu_vec)\n",
"\n",
"# Doesn't seem to work\n",
"#vf(pvec[0:3], gu_scratch[0:3]).copy_to_host()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "47d48527-0b86-4c2a-a4de-575077864bc2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7.69 ms, sys: 0 ns, total: 7.69 ms\n",
"Wall time: 10.5 ms\n"
]
}
],
"source": [
"%%time\n",
"guvec_out = vf(gu_vec, gu_scratch)\n",
"cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8a0f5f22-5c82-4f36-b1ed-ae36757feffd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.84482646, 0.29033154],\n",
" [1.10745473, 0.45235994],\n",
" [0.59781448, 0.44374413],\n",
" [0.54682673, 0.54149014],\n",
" [0.55322065, 0.23752734]])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"guvec_out[0:5].copy_to_host()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "75a4de2a-c9d8-4a11-a4c5-5f43dc978aee",
"metadata": {},
"outputs": [],
"source": [
"# Write a simple kernel\n",
"@cuda.jit('void(float64[:,::1], float64[:,::1])')\n",
"def kf(a_in, a_out):\n",
" i = cuda.grid(1)\n",
" v = a_in[i, :]\n",
" f(v)\n",
" for j in range(len(v)):\n",
" a_out[i, j] = v[j]\n",
" \n",
"\n",
"# Force it to compile\n",
"kernel_out = cupy.zeros_like(kernel_vec)\n",
"\n",
"#kf[1,1](vec[0:1], kernel_out[0:1]) "
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "531f144e-b301-47d4-a41e-2e65aec4885b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 45 ms, sys: 51 µs, total: 45 ms\n",
"Wall time: 81.1 ms\n"
]
}
],
"source": [
"%%time\n",
"# This is much faster\n",
"kf[len(kernel_vec),1](kernel_vec, kernel_out)\n",
"cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "e6b4b4ea-d8b6-47d7-adb3-38ed021b09e4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.84482646, 0.29033154],\n",
" [1.10745473, 0.45235994],\n",
" [0.59781448, 0.44374413]])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kernel_out[0:3]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "25f1fcfd-b1be-4784-90e0-eb612d4466aa",
"metadata": {},
"outputs": [],
"source": [
"cupy.testing.assert_array_equal(guvec_out, kernel_out)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7dcc610-60e2-4b0f-b7ec-a3980e8f45ae",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment