Skip to content

Instantly share code, notes, and snippets.

@gmarkall
gmarkall / backtrace.py
Created February 29, 2024 10:27
Collecting native backtraces during Python frame execution
import sys
from cffi import FFI
from numba import jit
ffi = FFI()
ffi.cdef("""
int backtrace(void** array, int size);
char** backtrace_symbols(void* const* array, int size);
void backtrace_symbols_fd(void* const* array, int size, int fd);
@gmarkall
gmarkall / usectx.py
Created January 6, 2023 19:40
Demonstration of using a Numba target context to get the LLVM type for a Numba type
from numba import float64
from numba.core import typing, cpu
# Create a Numba type - a 1D C-ordered array of float64
numba_arr_type = float64[::1]
print("Numba type:", numba_arr_type)
# Construct a Numba target context for the CPU and use it to get the LLVM type
# for the given Numba type in that context
typingctx = typing.Context()
@gmarkall
gmarkall / cuda_intrinsic_use.py
Created October 21, 2022 11:38
Example using an intrinsic defined for the CUDA target
@cuda.jit('void()')
def f():
print("1. Clock value is", cuda_clock64())
print("2. Clock value is", cuda_clock64())
# Example output:
# 1. Clock value is 6670192
# 2. Clock value is 6723668
f[1, 1]()
@gmarkall
gmarkall / cuda_intrinsic_example.py
Created October 21, 2022 11:37
Example defining an intrinsic in the Numba CUDA target
from numba.cuda.extending import intrinsic
from llvmlite import ir
@intrinsic
def cuda_clock64(typingctx):
sig = types.uint64()
def codegen(context, builder, sig, args):
function_type = ir.FunctionType(ir.IntType(64), [])
instruction = "mov.u64 $0, %clock64;"
@gmarkall
gmarkall / example_using_cuda_overload.py
Created October 21, 2022 11:36
Example using an overload in the CUDA target
from numba import cuda
import numpy as np
@cuda.jit
def f(arr):
print("Sum is", arr.sum())
# Prints "Sum is 10"
f[1, 1](np.arange(5))
@gmarkall
gmarkall / overload_method_example.py
Created October 21, 2022 11:35
Example overloading a method in Numba's CUDA target
from numba.extending import overload_method
from numba import types
@overload_method(types.Array, 'sum', target='cuda')
def array_sum(arr):
if arr.ndim != 1:
return None
def sum_impl(arr):
res = 0
# Notes for https://numba.discourse.group/t/kernel-within-a-kernel/1582/4
import cupy as cp
def where(d2_array, col_index, _data_):
return d2_array[cp.where(d2_array[:, col_index] == _data_)]
def fill_arrays_with_data_based_on_unique_data(unique_data, cp_data,
# Use with https://github.com/gmarkall/numba/tree/cuda-linker-options
from numba import cuda, float32, void
def axpy(r, a, x, y):
start = cuda.grid(1)
step = cuda.gridsize(1)
for i in range(start, len(r), step):
# Implements unicode equality for the CUDA target
from numba import cuda, types
from numba.core.extending import overload
from numba.core.pythonapi import (PY_UNICODE_1BYTE_KIND,
PY_UNICODE_2BYTE_KIND,
PY_UNICODE_4BYTE_KIND)
from numba.cpython.unicode import deref_uint8, deref_uint16, deref_uint32
import numpy as np
import operator
diff --git a/numba/core/extending.py b/numba/core/extending.py
index 9d005fe74..b42442a38 100644
--- a/numba/core/extending.py
+++ b/numba/core/extending.py
@@ -155,8 +155,10 @@ def register_jitable(*args, **kwargs):
def wrap(fn):
# It is just a wrapper for @overload
inline = kwargs.pop('inline', 'never')
+ target = kwargs.pop('target', 'cpu')