Created
April 23, 2019 17:58
-
-
Save sklam/e7ad0f2151872f47d72c2ea9e674806b to your computer and use it in GitHub Desktop.
Usage example for cuda_device_fn.inspect_ptx()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from numba import cuda\n", | |
"from numba import types" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@cuda.jit(device=True)\n", | |
"def foo(x, y):\n", | |
" return x + y\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Function name used in PTX _ZN8__main__7foo$241Eii\n" | |
] | |
} | |
], | |
"source": [ | |
"args = (types.int32, types.int32)\n", | |
"cres = foo.compile(args)\n", | |
"\n", | |
"print(\"Function name used in PTX\", cres.fndesc.mangled_name)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"//\n", | |
"// Generated by NVIDIA NVVM Compiler\n", | |
"//\n", | |
"// Compiler Build ID: CL-22776078\n", | |
"// Cuda compilation tools, release 9.0, V9.0.175\n", | |
"// Based on LLVM 3.4svn\n", | |
"//\n", | |
"\n", | |
".version 6.0\n", | |
".target sm_30\n", | |
".address_size 64\n", | |
"\n", | |
"\t// .globl\t_ZN8__main__7foo$241Eii\n", | |
".common .global .align 8 .u64 _ZN08NumbaEnv8__main__7foo$241Eii;\n", | |
"\n", | |
".visible .func (.param .b32 func_retval0) _ZN8__main__7foo$241Eii(\n", | |
"\t.param .b64 _ZN8__main__7foo$241Eii_param_0,\n", | |
"\t.param .b32 _ZN8__main__7foo$241Eii_param_1,\n", | |
"\t.param .b32 _ZN8__main__7foo$241Eii_param_2\n", | |
")\n", | |
"{\n", | |
"\t.reg .b32 \t%r<2>;\n", | |
"\t.reg .b64 \t%rd<5>;\n", | |
"\n", | |
"\n", | |
"\tld.param.u64 \t%rd1, [_ZN8__main__7foo$241Eii_param_0];\n", | |
"\tld.param.s32 \t%rd2, [_ZN8__main__7foo$241Eii_param_1];\n", | |
"\tld.param.s32 \t%rd3, [_ZN8__main__7foo$241Eii_param_2];\n", | |
"\tadd.s64 \t%rd4, %rd3, %rd2;\n", | |
"\tst.u64 \t[%rd1], %rd4;\n", | |
"\tmov.u32 \t%r1, 0;\n", | |
"\tst.param.b32\t[func_retval0+0], %r1;\n", | |
"\tret;\n", | |
"}\n", | |
"\n", | |
"\n", | |
"\u0000\n" | |
] | |
} | |
], | |
"source": [ | |
"ptx_in_bytes = foo.inspect_ptx(args)\n", | |
"\n", | |
"print(ptx_in_bytes.decode('ascii'))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment