Skip to content

Instantly share code, notes, and snippets.

@sklam
Last active December 29, 2015 18:49
Show Gist options
  • Save sklam/7713512 to your computer and use it in GitHub Desktop.
Save sklam/7713512 to your computer and use it in GitHub Desktop.
Comparing numba new and old
@contextlib.contextmanager
def benchmark(ident):
ts = timer()
yield
te = timer()
print('%s %.3fms' % (ident, (te - ts)*1000))
class FunctionWrapper(object):
"""
Result of @jit for functions.
"""
def __init__(self, dispatcher, py_func, abstract=False, opaque=False):
self.dispatcher = dispatcher
self.py_func = py_func
# self.signature = signature
self.abstract = abstract
self.llvm_funcs = {}
self.ctypes_funcs = {}
self.envs = {}
self.opaque = opaque
self.implementor = None
def __call__(self, *args, **kwargs):
from numba2.representation import byref, stack_allocate
from numba2.conversion import (
toctypes, fromctypes, toobject, fromobject, ctype)
#from numba2.support.ctypes_support import CTypesStruct
#from numba2.types import Function
with benchmark('setup %s' % self.py_func.__name__):
# Keep this alive for the duration of the call
keepalive = list(args) + list(kwargs.values())
# Order arguments
args = flatargs(self.dispatcher.f, args, kwargs)
argtypes = [typeof(x) for x in args]
with benchmark('translate %s' % self.py_func.__name__):
# Translate
cfunc, restype = self.translate(argtypes)
with benchmark('precall %s' % self.py_func.__name__):
# Construct numba values
arg_objs = list(starmap(fromobject, zip(args, argtypes)))
# Map numba values to a ctypes representation
args = []
for arg, argtype in zip(arg_objs, argtypes):
c_arg = toctypes(arg, argtype, keepalive)
if byref(argtype) and stack_allocate(argtype):
c_arg = ctypes.pointer(c_arg)
args.append(c_arg)
# We need this cast since the ctypes function constructed from LLVM
# IR has different structs (which are structurally equivalent)
c_restype = ctype(restype)
if byref(restype):
c_result = c_restype() # dummy result value
args.append(ctypes.pointer(c_result))
c_restype = None # void
c_signature = ctypes.PYFUNCTYPE(c_restype, *[type(arg) for arg in args])
cfunc = ctypes.cast(cfunc, c_signature)
with benchmark('call %s' % self.py_func.__name__):
# Handle calling convention
if byref(restype):
cfunc(*args)
else:
c_result = cfunc(*args)
with benchmark('postcall %s' % self.py_func.__name__):
# Map ctypes result back to a python value
result = fromctypes(c_result, restype)
result_obj = toobject(result, restype)
return result_obj
define i32 @testarray.new_sum({ { i64*, i64 }*, { i64*, i64 }*, i32* }* nocapture, i32) #0 {
testarray.new_sum:
%2 = icmp sgt i32 %1, 0
br i1 %2, label %testarray.new_sum5.lr.ph, label %testarray.new_sum6
testarray.new_sum5.lr.ph: ; preds = %testarray.new_sum
%3 = sext i32 %1 to i64
%4 = getelementptr { { i64*, i64 }*, { i64*, i64 }*, i32* }* %0, i64 0, i32 2
%5 = load i32** %4, align 8
%6 = getelementptr inbounds { { i64*, i64 }*, { i64*, i64 }*, i32* }* %0, i64 0, i32 0
%7 = load { i64*, i64 }** %6, align 8
%8 = getelementptr inbounds { i64*, i64 }* %7, i64 0, i32 0
%9 = load i64** %8, align 8
%.0.i1.i.i.i.i2.i.i.i.i.i = load i64* %9, align 8
br label %testarray.new_sum5
testarray.new_sum5: ; preds = %testarray.new_sum5, %testarray.new_sum5.lr.ph
%lsr.iv26 = phi i64 [ %lsr.iv.next, %testarray.new_sum5 ], [ %3, %testarray.new_sum5.lr.ph ]
%lsr.iv = phi i32* [ %scevgep27, %testarray.new_sum5 ], [ %5, %testarray.new_sum5.lr.ph ]
%.324 = phi i32 [ %.0.i, %testarray.new_sum5 ], [ 0, %testarray.new_sum5.lr.ph ]
%.0.i1.i.i.i.i.i.i = load i32* %lsr.iv, align 4
%.0.i = add i32 %.0.i1.i.i.i.i.i.i, %.324
%scevgep27 = getelementptr i32* %lsr.iv, i64 %.0.i1.i.i.i.i2.i.i.i.i.i
%lsr.iv.next = add i64 %lsr.iv26, -1
%exitcond = icmp eq i64 %lsr.iv.next, 0
br i1 %exitcond, label %testarray.new_sum6, label %testarray.new_sum5
testarray.new_sum6: ; preds = %testarray.new_sum5, %testarray.new_sum
%.3.lcssa = phi i32 [ 0, %testarray.new_sum ], [ %.0.i, %testarray.new_sum5 ]
ret i32 %.3.lcssa
}
define i32 @__numba_specialized_2_testarray_2E_raw({ i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a, i64 %M) {
entry:
%0 = bitcast { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a to { i64, i8* }*
tail call void @Py_INCREF({ i64, i8* }* %0)
%1 = getelementptr { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a, i64 0, i32 2
%2 = load i8** %1, align 8, !tbaa !2
%3 = getelementptr { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a, i64 0, i32 5
%4 = load i64** %3, align 8, !tbaa !5
%5 = load i64* %4, align 8
%6 = icmp sgt i64 %M, 0
br i1 %6, label %"loop_body_30:20", label %"exit_for_29:4"
"exit_for_29:4": ; preds = %"loop_body_30:20", %entry
%sumval_2.lcssa = phi i32 [ 0, %entry ], [ %9, %"loop_body_30:20" ]
%7 = bitcast { i64, i8*, i8*, i32, i64*, i64*, i8*, i8*, i32, i8* }* %a to { i64, i8* }*
tail call void @Py_XDECREF({ i64, i8* }* %7)
ret i32 %sumval_2.lcssa
"loop_body_30:20": ; preds = %entry, %"loop_body_30:20"
%lsr.iv3 = phi i64 [ %lsr.iv.next, %"loop_body_30:20" ], [ %M, %entry ]
%lsr.iv = phi i8* [ %10, %"loop_body_30:20" ], [ %2, %entry ]
%sumval_23 = phi i32 [ %9, %"loop_body_30:20" ], [ 0, %entry ]
%lsr.iv2 = bitcast i8* %lsr.iv to i32*
%lsr.iv1 = bitcast i8* %lsr.iv to i1*
%8 = load i32* %lsr.iv2, align 4, !tbaa !2
%9 = add i32 %8, %sumval_23
%scevgep = getelementptr i1* %lsr.iv1, i64 %5
%10 = bitcast i1* %scevgep to i8*
%lsr.iv.next = add i64 %lsr.iv3, -1
%exitcond = icmp eq i64 %lsr.iv.next, 0
br i1 %exitcond, label %"exit_for_29:4", label %"loop_body_30:20"
}
from numba import autojit
from numba2 import jit
import numpy as np
@jit
def new_sum(a, M):
sumval = 0
for i in range(M):
sumval += a[i]
return sumval
@autojit
def old_sum(a, M):
sumval = 0
for i in range(M):
sumval += a[i]
return sumval
A = np.arange(1000000, dtype='int32')
def runnew():
r = new_sum(A, A.shape[0])
def runold():
r = old_sum(A, A.shape[0])
@sklam
Copy link
Author

sklam commented Nov 29, 2013

In [1]: import testarray
/Users/sklam/dev/numba-lang/numba2/support/numpy_support.py:15: SyntaxWarning: import * only allowed at module level
  def from_dtype(dtype):
/Users/sklam/dev/numba-lang/numba2/support/numpy_support.py:78: SyntaxWarning: import * only allowed at module level
  def to_dtype(type):

In [2]: %timeit testarray.runnew()
1 loops, best of 3: 2.75 ms per loop

In [3]: %timeit testarray.runold()
1000 loops, best of 3: 404 µs per loop

In [4]: %timeit testarray.runnew()
100 loops, best of 3: 3.23 ms per loop

In [5]: %timeit testarray.runold()
1000 loops, best of 3: 409 µs per loop

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment