sklam · December 29, 2015 18:49 · sklam · Nov 29, 2013
diff --git a/functionwrapper.py b/functionwrapper.py
 @contextlib.contextmanager
 def benchmark(ident):
    ts = timer()
    yield
    te = timer()
    print('%s %.3fms' % (ident, (te - ts)*1000))

 class FunctionWrapper(object):
    """
    Result of @jit for functions.
    """

    def __init__(self, dispatcher, py_func, abstract=False, opaque=False):
        self.dispatcher = dispatcher
        self.py_func = py_func
        # self.signature = signature
        self.abstract = abstract

        self.llvm_funcs = {}
        self.ctypes_funcs = {}
        self.envs = {}

        self.opaque = opaque
        self.implementor = None

    def __call__(self, *args, **kwargs):
        from numba2.representation import byref, stack_allocate
        from numba2.conversion import (
            toctypes, fromctypes, toobject, fromobject, ctype)
        #from numba2.support.ctypes_support import CTypesStruct
        #from numba2.types import Function

        with benchmark('setup %s' % self.py_func.__name__):
            # Keep this alive for the duration of the call
            keepalive = list(args) + list(kwargs.values())

            # Order arguments
            args = flatargs(self.dispatcher.f, args, kwargs)
            argtypes = [typeof(x) for x in args]

        with benchmark('translate %s' % self.py_func.__name__):
            # Translate
            cfunc, restype = self.translate(argtypes)


        with benchmark('precall %s' % self.py_func.__name__):
            # Construct numba values
            arg_objs = list(starmap(fromobject, zip(args, argtypes)))

            # Map numba values to a ctypes representation
            args = []
            for arg, argtype in zip(arg_objs, argtypes):
                c_arg = toctypes(arg, argtype, keepalive)
                if byref(argtype) and stack_allocate(argtype):
                    c_arg = ctypes.pointer(c_arg)
                args.append(c_arg)

            # We need this cast since the ctypes function constructed from LLVM
            # IR has different structs (which are structurally equivalent)
            c_restype = ctype(restype)
            if byref(restype):
                c_result = c_restype() # dummy result value
                args.append(ctypes.pointer(c_result))
                c_restype = None # void

            c_signature = ctypes.PYFUNCTYPE(c_restype, *[type(arg) for arg in args])
            cfunc = ctypes.cast(cfunc, c_signature)

        with benchmark('call %s' % self.py_func.__name__):
            # Handle calling convention
            if byref(restype):
                cfunc(*args)
            else:
                c_result = cfunc(*args)

        with benchmark('postcall %s' % self.py_func.__name__):
            # Map ctypes result back to a python value
            result = fromctypes(c_result, restype)
            result_obj = toobject(result, restype)

            return result_obj
diff --git a/new-numba.ll b/new-numba.ll
 define i32 @testarray.new_sum({ { i64*, i64 }*, { i64*, i64 }*, i32* }* nocapture, i32) #0 {
 testarray.new_sum:
  %2 = icmp sgt i32 %1, 0
  br i1 %2, label %testarray.new_sum5.lr.ph, label %testarray.new_sum6

 testarray.new_sum5.lr.ph:                         ; preds = %testarray.new_sum
  %3 = sext i32 %1 to i64
  %4 = getelementptr { { i64*, i64 }*, { i64*, i64 }*, i32* }* %0, i64 0, i32 2
  %5 = load i32** %4, align 8
  %6 = getelementptr inbounds { { i64*, i64 }*, { i64*, i64 }*, i32* }* %0, i64 0, i32 0
  %7 = load { i64*, i64 }** %6, align 8
  %8 = getelementptr inbounds { i64*, i64 }* %7, i64 0, i32 0
  %9 = load i64** %8, align 8
  %.0.i1.i.i.i.i2.i.i.i.i.i = load i64* %9, align 8
  br label %testarray.new_sum5

 testarray.new_sum5:                               ; preds = %testarray.new_sum5, %testarray.new_sum5.lr.ph
  %lsr.iv26 = phi i64 [ %lsr.iv.next, %testarray.new_sum5 ], [ %3, %testarray.new_sum5.lr.ph ]
  %lsr.iv = phi i32* [ %scevgep27, %testarray.new_sum5 ], [ %5, %testarray.new_sum5.lr.ph ]
  %.324 = phi i32 [ %.0.i, %testarray.new_sum5 ], [ 0, %testarray.new_sum5.lr.ph ]
  %.0.i1.i.i.i.i.i.i = load i32* %lsr.iv, align 4
  %.0.i = add i32 %.0.i1.i.i.i.i.i.i, %.324
  %scevgep27 = getelementptr i32* %lsr.iv, i64 %.0.i1.i.i.i.i2.i.i.i.i.i
  %lsr.iv.next = add i64 %lsr.iv26, -1
  %exitcond = icmp eq i64 %lsr.iv.next, 0
  br i1 %exitcond, label %testarray.new_sum6, label %testarray.new_sum5

 testarray.new_sum6:                               ; preds = %testarray.new_sum5, %testarray.new_sum
  %.3.lcssa = phi i32 [ 0, %testarray.new_sum ], [ %.0.i, %testarray.new_sum5 ]
  ret i32 %.3.lcssa
 }
diff --git a/old-numba.ll b/old-numba.ll
 define i32 @__numba_specialized_2_testarray_2E_raw({ i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a, i64 %M) {
 entry:
  %0 = bitcast { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a to { i64, i8* }*
  tail call void @Py_INCREF({ i64, i8* }* %0)
  %1 = getelementptr { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a, i64 0, i32 2
  %2 = load i8** %1, align 8, !tbaa !2
  %3 = getelementptr { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a, i64 0, i32 5
  %4 = load i64** %3, align 8, !tbaa !5
  %5 = load i64* %4, align 8
  %6 = icmp sgt i64 %M, 0
  br i1 %6, label %"loop_body_30:20", label %"exit_for_29:4"

 "exit_for_29:4":                                  ; preds = %"loop_body_30:20", %entry
  %sumval_2.lcssa = phi i32 [ 0, %entry ], [ %9, %"loop_body_30:20" ]
  %7 = bitcast { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a to { i64, i8* }*
  tail call void @Py_XDECREF({ i64, i8* }* %7)
  ret i32 %sumval_2.lcssa

 "loop_body_30:20":                                ; preds = %entry, %"loop_body_30:20"
  %lsr.iv3 = phi i64 [ %lsr.iv.next, %"loop_body_30:20" ], [ %M, %entry ]
  %lsr.iv = phi i8* [ %10, %"loop_body_30:20" ], [ %2, %entry ]
  %sumval_23 = phi i32 [ %9, %"loop_body_30:20" ], [ 0, %entry ]
  %lsr.iv2 = bitcast i8* %lsr.iv to i32*
  %lsr.iv1 = bitcast i8* %lsr.iv to i1*
  %8 = load i32* %lsr.iv2, align 4, !tbaa !2
  %9 = add i32 %8, %sumval_23
  %scevgep = getelementptr i1* %lsr.iv1, i64 %5
  %10 = bitcast i1* %scevgep to i8*
  %lsr.iv.next = add i64 %lsr.iv3, -1
  %exitcond = icmp eq i64 %lsr.iv.next, 0
  br i1 %exitcond, label %"exit_for_29:4", label %"loop_body_30:20"
 }

diff --git a/testarray.py b/testarray.py
 from numba import autojit
 from numba2 import jit
 import numpy as np

 @jit
 def new_sum(a, M):
    sumval = 0
    for i in range(M):
          sumval += a[i]
    return sumval

 @autojit
 def old_sum(a, M):
    sumval = 0
    for i in range(M):
          sumval += a[i]
    return sumval

 A = np.arange(1000000, dtype='int32')

 def runnew():
    r = new_sum(A, A.shape[0])

 def runold():
    r = old_sum(A, A.shape[0])
	@contextlib.contextmanager
	def benchmark(ident):
	ts = timer()
	yield
	te = timer()
	print('%s %.3fms' % (ident, (te - ts)*1000))

	class FunctionWrapper(object):
	"""
	Result of @jit for functions.
	"""

	def __init__(self, dispatcher, py_func, abstract=False, opaque=False):
	self.dispatcher = dispatcher
	self.py_func = py_func
	# self.signature = signature
	self.abstract = abstract

	self.llvm_funcs = {}
	self.ctypes_funcs = {}
	self.envs = {}

	self.opaque = opaque
	self.implementor = None

	def __call__(self, args, *kwargs):
	from numba2.representation import byref, stack_allocate
	from numba2.conversion import (
	toctypes, fromctypes, toobject, fromobject, ctype)
	#from numba2.support.ctypes_support import CTypesStruct
	#from numba2.types import Function

	with benchmark('setup %s' % self.py_func.__name__):
	# Keep this alive for the duration of the call
	keepalive = list(args) + list(kwargs.values())

	# Order arguments
	args = flatargs(self.dispatcher.f, args, kwargs)
	argtypes = [typeof(x) for x in args]

	with benchmark('translate %s' % self.py_func.__name__):
	# Translate
	cfunc, restype = self.translate(argtypes)


	with benchmark('precall %s' % self.py_func.__name__):
	# Construct numba values
	arg_objs = list(starmap(fromobject, zip(args, argtypes)))

	# Map numba values to a ctypes representation
	args = []
	for arg, argtype in zip(arg_objs, argtypes):
	c_arg = toctypes(arg, argtype, keepalive)
	if byref(argtype) and stack_allocate(argtype):
	c_arg = ctypes.pointer(c_arg)
	args.append(c_arg)

	# We need this cast since the ctypes function constructed from LLVM
	# IR has different structs (which are structurally equivalent)
	c_restype = ctype(restype)
	if byref(restype):
	c_result = c_restype() # dummy result value
	args.append(ctypes.pointer(c_result))
	c_restype = None # void

	c_signature = ctypes.PYFUNCTYPE(c_restype, *[type(arg) for arg in args])
	cfunc = ctypes.cast(cfunc, c_signature)

	with benchmark('call %s' % self.py_func.__name__):
	# Handle calling convention
	if byref(restype):
	cfunc(*args)
	else:
	c_result = cfunc(*args)

	with benchmark('postcall %s' % self.py_func.__name__):
	# Map ctypes result back to a python value
	result = fromctypes(c_result, restype)
	result_obj = toobject(result, restype)

	return result_obj
	define i32 @testarray.new_sum({ { i64, i64 }, { i64, i64 }, i32* }* nocapture, i32) #0 {
	testarray.new_sum:
	%2 = icmp sgt i32 %1, 0
	br i1 %2, label %testarray.new_sum5.lr.ph, label %testarray.new_sum6

	testarray.new_sum5.lr.ph: ; preds = %testarray.new_sum
	%3 = sext i32 %1 to i64
	%4 = getelementptr { { i64, i64 }, { i64, i64 }, i32* }* %0, i64 0, i32 2
	%5 = load i32** %4, align 8
	%6 = getelementptr inbounds { { i64, i64 }, { i64, i64 }, i32* }* %0, i64 0, i32 0
	%7 = load { i64, i64 }* %6, align 8
	%8 = getelementptr inbounds { i64, i64 } %7, i64 0, i32 0
	%9 = load i64** %8, align 8
	%.0.i1.i.i.i.i2.i.i.i.i.i = load i64* %9, align 8
	br label %testarray.new_sum5

	testarray.new_sum5: ; preds = %testarray.new_sum5, %testarray.new_sum5.lr.ph
	%lsr.iv26 = phi i64 [ %lsr.iv.next, %testarray.new_sum5 ], [ %3, %testarray.new_sum5.lr.ph ]
	%lsr.iv = phi i32* [ %scevgep27, %testarray.new_sum5 ], [ %5, %testarray.new_sum5.lr.ph ]
	%.324 = phi i32 [ %.0.i, %testarray.new_sum5 ], [ 0, %testarray.new_sum5.lr.ph ]
	%.0.i1.i.i.i.i.i.i = load i32* %lsr.iv, align 4
	%.0.i = add i32 %.0.i1.i.i.i.i.i.i, %.324
	%scevgep27 = getelementptr i32* %lsr.iv, i64 %.0.i1.i.i.i.i2.i.i.i.i.i
	%lsr.iv.next = add i64 %lsr.iv26, -1
	%exitcond = icmp eq i64 %lsr.iv.next, 0
	br i1 %exitcond, label %testarray.new_sum6, label %testarray.new_sum5

	testarray.new_sum6: ; preds = %testarray.new_sum5, %testarray.new_sum
	%.3.lcssa = phi i32 [ 0, %testarray.new_sum ], [ %.0.i, %testarray.new_sum5 ]
	ret i32 %.3.lcssa
	}
	define i32 @__numba_specialized_2_testarray_2E_raw({ i64, i8, i8, i32, i64, i64, i8, i8, i32, i8* }* %a, i64 %M) {
	entry:
	%0 = bitcast { i64, i8, i8, i32, i64, i64, i8, i8, i32, i8* }* %a to { i64, i8* }*
	tail call void @Py_INCREF({ i64, i8* }* %0)
	%1 = getelementptr { i64, i8, i8, i32, i64, i64, i8, i8, i32, i8* }* %a, i64 0, i32 2
	%2 = load i8** %1, align 8, !tbaa !2
	%3 = getelementptr { i64, i8, i8, i32, i64, i64, i8, i8, i32, i8* }* %a, i64 0, i32 5
	%4 = load i64** %3, align 8, !tbaa !5
	%5 = load i64* %4, align 8
	%6 = icmp sgt i64 %M, 0
	br i1 %6, label %"loop_body_30:20", label %"exit_for_29:4"

	"exit_for_29:4": ; preds = %"loop_body_30:20", %entry
	%sumval_2.lcssa = phi i32 [ 0, %entry ], [ %9, %"loop_body_30:20" ]
	%7 = bitcast { i64, i8, i8, i32, i64, i64, i8, i8, i32, i8* }* %a to { i64, i8* }*
	tail call void @Py_XDECREF({ i64, i8* }* %7)
	ret i32 %sumval_2.lcssa

	"loop_body_30:20": ; preds = %entry, %"loop_body_30:20"
	%lsr.iv3 = phi i64 [ %lsr.iv.next, %"loop_body_30:20" ], [ %M, %entry ]
	%lsr.iv = phi i8* [ %10, %"loop_body_30:20" ], [ %2, %entry ]
	%sumval_23 = phi i32 [ %9, %"loop_body_30:20" ], [ 0, %entry ]
	%lsr.iv2 = bitcast i8* %lsr.iv to i32*
	%lsr.iv1 = bitcast i8* %lsr.iv to i1*
	%8 = load i32* %lsr.iv2, align 4, !tbaa !2
	%9 = add i32 %8, %sumval_23
	%scevgep = getelementptr i1* %lsr.iv1, i64 %5
	%10 = bitcast i1* %scevgep to i8*
	%lsr.iv.next = add i64 %lsr.iv3, -1
	%exitcond = icmp eq i64 %lsr.iv.next, 0
	br i1 %exitcond, label %"exit_for_29:4", label %"loop_body_30:20"
	}
	from numba import autojit
	from numba2 import jit
	import numpy as np

	@jit
	def new_sum(a, M):
	sumval = 0
	for i in range(M):
	sumval += a[i]
	return sumval

	@autojit
	def old_sum(a, M):
	sumval = 0
	for i in range(M):
	sumval += a[i]
	return sumval

	A = np.arange(1000000, dtype='int32')

	def runnew():
	r = new_sum(A, A.shape[0])

	def runold():
	r = old_sum(A, A.shape[0])