Skip to content

Instantly share code, notes, and snippets.

@sklam
Created April 23, 2019 17:58
Show Gist options
  • Save sklam/e7ad0f2151872f47d72c2ea9e674806b to your computer and use it in GitHub Desktop.
Save sklam/e7ad0f2151872f47d72c2ea9e674806b to your computer and use it in GitHub Desktop.
Usage example for cuda_device_fn.inspect_ptx()
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from numba import cuda\n",
"from numba import types"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"@cuda.jit(device=True)\n",
"def foo(x, y):\n",
" return x + y\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Function name used in PTX _ZN8__main__7foo$241Eii\n"
]
}
],
"source": [
"args = (types.int32, types.int32)\n",
"cres = foo.compile(args)\n",
"\n",
"print(\"Function name used in PTX\", cres.fndesc.mangled_name)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"//\n",
"// Generated by NVIDIA NVVM Compiler\n",
"//\n",
"// Compiler Build ID: CL-22776078\n",
"// Cuda compilation tools, release 9.0, V9.0.175\n",
"// Based on LLVM 3.4svn\n",
"//\n",
"\n",
".version 6.0\n",
".target sm_30\n",
".address_size 64\n",
"\n",
"\t// .globl\t_ZN8__main__7foo$241Eii\n",
".common .global .align 8 .u64 _ZN08NumbaEnv8__main__7foo$241Eii;\n",
"\n",
".visible .func (.param .b32 func_retval0) _ZN8__main__7foo$241Eii(\n",
"\t.param .b64 _ZN8__main__7foo$241Eii_param_0,\n",
"\t.param .b32 _ZN8__main__7foo$241Eii_param_1,\n",
"\t.param .b32 _ZN8__main__7foo$241Eii_param_2\n",
")\n",
"{\n",
"\t.reg .b32 \t%r<2>;\n",
"\t.reg .b64 \t%rd<5>;\n",
"\n",
"\n",
"\tld.param.u64 \t%rd1, [_ZN8__main__7foo$241Eii_param_0];\n",
"\tld.param.s32 \t%rd2, [_ZN8__main__7foo$241Eii_param_1];\n",
"\tld.param.s32 \t%rd3, [_ZN8__main__7foo$241Eii_param_2];\n",
"\tadd.s64 \t%rd4, %rd3, %rd2;\n",
"\tst.u64 \t[%rd1], %rd4;\n",
"\tmov.u32 \t%r1, 0;\n",
"\tst.param.b32\t[func_retval0+0], %r1;\n",
"\tret;\n",
"}\n",
"\n",
"\n",
"\u0000\n"
]
}
],
"source": [
"ptx_in_bytes = foo.inspect_ptx(args)\n",
"\n",
"print(ptx_in_bytes.decode('ascii'))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment