August 8, 2022 09:04 · April 7, 2022 20:22 · November 18, 2021 11:12 · October 14, 2021 19:52 · October 1, 2021 16:13 · October 1, 2021 15:37
 diff --git a/numba/core/extending.py b/numba/core/extending.py
 index 9d005fe74..b42442a38 100644
 --- a/numba/core/extending.py
 +++ b/numba/core/extending.py
 @@ -155,8 +155,10 @@ def register_jitable(*args, **kwargs):
     def wrap(fn):
         # It is just a wrapper for @overload
         inline = kwargs.pop('inline', 'never')
 +        target = kwargs.pop('target', 'cpu')
 
 from numba import njit, f8
 from numba.typed import List
 from numba.extending import models, register_model



 class Interval(object):
    """
    A half-open interval on the real number line.
    """
 $ PYTHONMALLOC=malloc valgrind-numba python -m numba.runtests numba.tests.test_ufuncs
 ==7578== Memcheck, a memory error detector
 ==7578== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
 ==7578== Using Valgrind-3.17.0 and LibVEX; rerun with -h for copyright info
 ==7578== Command: python -m numba.runtests numba.tests.test_ufuncs
 ==7578== 
 ==7579== Warning: invalid file descriptor 1024 in syscall close()
 ==7579== Warning: invalid file descriptor 1025 in syscall close()
 ==7579== Warning: invalid file descriptor 1026 in syscall close()
 ==7579== Warning: invalid file descriptor 1027 in syscall close()
 # Works in conjunction with https://github.com/numba/numba/pull/7453

 from numba import cuda
 import asyncio


 async def f():
    s1 = cuda.stream()
    s2 = cuda.stream()
 Iterating
 Stream <CUDA stream 93950825260944 on <CUDA context c_void_p(93950819544272) of device 0>> done
 Stream <CUDA stream 93950825975488 on <CUDA context c_void_p(93950819544272) of device 0>> done
 import math
 from numba import cuda, njit, objmode
 from time import perf_counter
 import numpy as np

 import cupy as cp



 @njit
 import itertools

 import numba as nb
 from numba.experimental import jitclass
 from typing import List, Tuple, Dict
 from heapq import heappush, heappop


 # @jitclass
 class PurePythonPriorityQueue:
 $ python wagg.py 
 Running with 16777216 elements, of which approximately 25.0% are zero

 There are 12584753 nonzeroes in:
 [0.417022   0.72032449 0.         ... 0.20570723 0.36716537 0.0979951 ]

 The kernel found 12584753 elements, resulting in the array:
 [0.14349547 0.43006714 0.48695992 ... 0.         0.         0.        ]

 Traceback (most recent call last):
 import numpy as np
 from numba import njit
 from time import perf_counter


 # From https://en.wikipedia.org/wiki/Fast_Walsh%E2%80%93Hadamard_transform
 def fwht(a) -> None:
    """In-place Fast Walsh–Hadamard Transform of array a."""
    h = 1
    while h < len(a):
	diff --git a/numba/core/extending.py b/numba/core/extending.py
	index 9d005fe74..b42442a38 100644
	--- a/numba/core/extending.py
	+++ b/numba/core/extending.py
	@@ -155,8 +155,10 @@ def register_jitable(args, *kwargs):
	def wrap(fn):
	# It is just a wrapper for @overload
	inline = kwargs.pop('inline', 'never')
	+ target = kwargs.pop('target', 'cpu')
	from numba import njit, f8
	from numba.typed import List
	from numba.extending import models, register_model



	class Interval(object):
	"""
	A half-open interval on the real number line.
	"""
	$ PYTHONMALLOC=malloc valgrind-numba python -m numba.runtests numba.tests.test_ufuncs
	==7578== Memcheck, a memory error detector
	==7578== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
	==7578== Using Valgrind-3.17.0 and LibVEX; rerun with -h for copyright info
	==7578== Command: python -m numba.runtests numba.tests.test_ufuncs
	==7578==
	==7579== Warning: invalid file descriptor 1024 in syscall close()
	==7579== Warning: invalid file descriptor 1025 in syscall close()
	==7579== Warning: invalid file descriptor 1026 in syscall close()
	==7579== Warning: invalid file descriptor 1027 in syscall close()
	# Works in conjunction with https://github.com/numba/numba/pull/7453

	from numba import cuda
	import asyncio


	async def f():
	s1 = cuda.stream()
	s2 = cuda.stream()
	Iterating
	Stream <CUDA stream 93950825260944 on <CUDA context c_void_p(93950819544272) of device 0>> done
	Stream <CUDA stream 93950825975488 on <CUDA context c_void_p(93950819544272) of device 0>> done
	import math
	from numba import cuda, njit, objmode
	from time import perf_counter
	import numpy as np

	import cupy as cp



	@njit
	import itertools

	import numba as nb
	from numba.experimental import jitclass
	from typing import List, Tuple, Dict
	from heapq import heappush, heappop


	# @jitclass
	class PurePythonPriorityQueue:
	$ python wagg.py
	Running with 16777216 elements, of which approximately 25.0% are zero

	There are 12584753 nonzeroes in:
	[0.417022 0.72032449 0. ... 0.20570723 0.36716537 0.0979951 ]

	The kernel found 12584753 elements, resulting in the array:
	[0.14349547 0.43006714 0.48695992 ... 0. 0. 0. ]

	Traceback (most recent call last):
	import numpy as np
	from numba import njit
	from time import perf_counter


	# From https://en.wikipedia.org/wiki/Fast_Walsh%E2%80%93Hadamard_transform
	def fwht(a) -> None:
	"""In-place Fast Walsh–Hadamard Transform of array a."""
	h = 1
	while h < len(a):