brandonwillard · June 7, 2020 22:14
diff --git a/numba_bug.py b/numba_bug.py
 import numba
 import numpy


 #The filter2d with the same signature as Theano
 #but not a class method.
 def filter2d_theano(node, inputs, outputs):
    image, filt = inputs
    M, N = image.shape
    Mf, Nf = filt.shape
    Mf2 = Mf // 2
    Nf2 = Nf // 2
    result = numpy.zeros_like(image)
    for i in range(Mf2, M - Mf2):
        for j in range(Nf2, N - Nf2):
            num = 0.0
            for ii in range(Mf):
                for jj in range(Nf):
                    num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
            result[i, j] = num
    outputs[0][0] = result
    return numpy.zeros(0)

 fastfilter_2d_theano_auto = numba.autojit(filter2d_theano)
 image = numpy.random.random((100, 100))
 filt = numpy.random.random((10, 10))
 i = [image, filt]
 o = [[None]]

 fastfilter_2d_theano_auto(None, i, o)
 t2 = timeit(fastfilter_2d_theano_auto, None, i, o)
 print 'fastfilter_2d_theano_auto', t2
diff --git a/theano_numba.py b/theano_numba.py
 import time

 import numba
 from numba import double, jit
 import numpy
 import numpy as np
 import theano


 def timeit(f, *args):
    t0 = time.time()
    f(*args)
    t1 = time.time()
    return t1 - t0


 def filter2d(image, filt):
    M, N = image.shape
    Mf, Nf = filt.shape
    Mf2 = Mf // 2
    Nf2 = Nf // 2
    result = numpy.zeros_like(image)
    for i in range(Mf2, M - Mf2):
        for j in range(Nf2, N - Nf2):
            num = 0.0
            for ii in range(Mf):
                for jj in range(Nf):
                    num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
            result[i, j] = num
    return result


 #The filter2d with the same signature as Theano
 #but not a class method.
 def filter2d_theano(node, inputs, outputs):
    image, filt = inputs
    M, N = image.shape
    Mf, Nf = filt.shape
    Mf2 = Mf // 2
    Nf2 = Nf // 2
    result = numpy.zeros_like(image)
    for i in range(Mf2, M - Mf2):
        for j in range(Nf2, N - Nf2):
            num = 0.0
            for ii in range(Mf):
                for jj in range(Nf):
                    num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
            result[i, j] = num
    outputs[0][0] = result
    return numpy.zeros(0)

 fastfilter_2d = jit(double[:,:](double[:,:], double[:,:]))(filter2d)
 image = numpy.random.random((100, 100))
 filt = numpy.random.random((10, 10))
 t0 = time.time()
 t0 = timeit(filter2d, image, filt)
 t1 = timeit(fastfilter_2d, image, filt)

 print "python", t0
 print "numba", t1

 i = [image, filt]
 o = [[None]]
 in1_type = numba.typeof([image, filt])
 #in1_type = numba.typedlist(numba.double)
 #in1_type = numba.map_dtype()
 out_type = numba.f8[:]
 #fastfilter_2d_theano = numba.jit(out_type(numba.object_,
 #                                          in1_type,
 #                                          numba.object_))(filter2d_theano)


 class Filter2d(theano.Op):
    def make_node(self, image, filt):
        image = theano.tensor.as_tensor_variable(image)
        filt = theano.tensor.as_tensor_variable(filt)
        assert image.ndim == 2
        assert filt.ndim == 2
        return theano.Apply(self, [image, filt], [image.type()])

    def make_thunk(self, node, storage_map, compute_map, no_recycling):
        in1_type = getattr(numba, node.inputs[0].dtype)
        in2_type = getattr(numba, node.inputs[1].dtype)
        out_type = getattr(numba, node.outputs[0].dtype)
        self.numba_fct = numba.jit(out_type[:, :](in1_type[:, :],
                                                  in2_type[:, :]))(filter2d)

        #Assert that Numba infered the same output as our make_node
        assert isinstance(self.numba_fct.signature.return_type,
                          numba.typesystem.types.array_)
        assert (str(self.numba_fct.signature.return_type.dtype) ==
                node.outputs[0].dtype)
        assert (self.numba_fct.signature.return_type.ndim ==
                node.outputs[0].ndim)

        return super(Filter2d, self).make_thunk(
            node, storage_map, compute_map, no_recycling)

    def perform(self, node, inputs, outputs):
        image, filt = inputs
        out = self.numba_fct(image, filt)
        outputs[0][0] = out

 m1, m2 = theano.tensor.matrices('ab')

 out = Filter2d()(m1, m2)
 f = theano.function([m1, m2], out)
 assert numpy.allclose(f(image, filt), filter2d(image, filt))
 t3 = timeit(f, image, filt)
 print "theano+numba manual", t3

 # The following show a Numba bug
 print "Show Numba Bug!"
 fastfilter_2d_theano_auto = numba.autojit(filter2d_theano)
 fastfilter_2d_theano_auto(None, i, o)
 t2 = timeit(fastfilter_2d_theano_auto, None, i, o)
 print 'fastfilter_2d_theano_auto', t2
 #res2 = fastfilter_2d_theano(None, i, o)
 #res2 = o[0][0]


 class Filter2d_v2(theano.Op):
    def make_node(self, image, filt):
        image = theano.tensor.as_tensor_variable(image)
        filt = theano.tensor.as_tensor_variable(filt)
        assert image.ndim == 2
        assert filt.ndim == 2

        return theano.Apply(self, [image, filt], [image.type()])

    def make_thunk(self, node, storage_map, compute_map, no_recycling):
        self.perform = numba.autojit(filter2d_theano)
        in1_type = numba.typeof([np.arange(10, dtype=np.double).reshape(2, 5)])
        in2_type = getattr(numba, node.inputs[1].dtype)
        out_type = numba.f8[:]
        self.perform = numba.jit(out_type(numba.object_,
                                          in1_type,
                                          numba.object_))(filter2d_theano)
        return super(Filter2d_v2, self).make_thunk(
            node, storage_map, compute_map, no_recycling)

    def perform(self, node, inputs, outputs):
        image, filt = inputs
        M, N = image.shape
        Mf, Nf = filt.shape
        Mf2 = Mf // 2
        Nf2 = Nf // 2
        result = numpy.zeros_like(image)
        for i in range(Mf2, M - Mf2):
            for j in range(Nf2, N - Nf2):
                num = 0.0
                for ii in range(Mf):
                    for jj in range(Nf):
                        num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
                result[i, j] = num
        outputs[0][0] = result


 out = Filter2d_v2()(m1, m2)
 f = theano.function([m1, m2], out)
 theano.printing.debugprint(f)
 print f(image, filt)
 assert numpy.allclose(f(image, filt), filter2d(image, filt))
diff --git a/theano_op.py b/theano_op.py
 """This file provide an example of using Numba with Theano.

 It show how to make a Theano op that use a numba to
 accelerate its python code.

 This could be done further. The "loop" that interate on all the
 operation that a Theano function do is in C. So we end up with having
 c code that call python code that call numba compiled code. We could
 modify Theano to make it reuse directly the Numba executable from C,
 bypassing the python overhead.

 """
 import time

 import numba
 from numba import double, jit
 import numpy
 import numpy as np
 import theano


 def timeit(f, *args):
    t0 = time.time()
    f(*args)
    t1 = time.time()
    return t1 - t0


 def filter2d(image, filt):
    M, N = image.shape
    Mf, Nf = filt.shape
    Mf2 = Mf // 2
    Nf2 = Nf // 2
    result = numpy.zeros_like(image)
    for i in range(Mf2, M - Mf2):
        for j in range(Nf2, N - Nf2):
            num = 0.0
            for ii in range(Mf):
                for jj in range(Nf):
                    num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
            result[i, j] = num
    return result


 #The filter2d with the same signature as Theano
 #but not a class method.
 def filter2d_theano(node, inputs, outputs):
    image, filt = inputs
    M, N = image.shape
    Mf, Nf = filt.shape
    Mf2 = Mf // 2
    Nf2 = Nf // 2
    result = numpy.zeros_like(image)
    for i in range(Mf2, M - Mf2):
        for j in range(Nf2, N - Nf2):
            num = 0.0
            for ii in range(Mf):
                for jj in range(Nf):
                    num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
            result[i, j] = num
    outputs[0][0] = result
    return numpy.zeros(0)


 class Filter2d(theano.Op):
    def make_node(self, image, filt):
        image = theano.tensor.as_tensor_variable(image)
        filt = theano.tensor.as_tensor_variable(filt)
        assert image.ndim == 2
        assert filt.ndim == 2
        return theano.Apply(self, [image, filt], [image.type()])

    def make_thunk(self, node, storage_map, compute_map, no_recycling):
        in1_type = getattr(numba, node.inputs[0].dtype)
        in2_type = getattr(numba, node.inputs[1].dtype)
        out_type = getattr(numba, node.outputs[0].dtype)
        self.numba_fct = numba.jit(out_type[:, :](in1_type[:, :],
                                                  in2_type[:, :]))(filter2d)

        #Assert that Numba infered the same output as our make_node
        assert isinstance(self.numba_fct.signature.return_type,
                          numba.typesystem.types.array_)
        assert (str(self.numba_fct.signature.return_type.dtype) ==
                node.outputs[0].dtype)
        assert (self.numba_fct.signature.return_type.ndim ==
                node.outputs[0].ndim)

        return super(Filter2d, self).make_thunk(
            node, storage_map, compute_map, no_recycling)

    def perform(self, node, inputs, outputs):
        image, filt = inputs
        out = self.numba_fct(image, filt)
        outputs[0][0] = out

 fastfilter_2d = jit(double[:, :](double[:, :], double[:, :]))(filter2d)
 image = numpy.random.random((100, 100))
 filt = numpy.random.random((10, 10))

 # time python version
 t0 = timeit(filter2d, image, filt)
 print "python", t0

 # time numba version
 t1 = timeit(fastfilter_2d, image, filt)
 print "numba", t1


 # time/test theano version
 m1, m2 = theano.tensor.matrices('ab')
 out = Filter2d()(m1, m2)
 f = theano.function([m1, m2], out)
 assert numpy.allclose(f(image, filt), filter2d(image, filt))
 t2 = timeit(f, image, filt)
 print "theano+numba manual", t2
 print "Theano speed up vs Numba (slowdown caused by extra overhead)", t1/t2
	import numba
	import numpy


	#The filter2d with the same signature as Theano
	#but not a class method.
	def filter2d_theano(node, inputs, outputs):
	image, filt = inputs
	M, N = image.shape
	Mf, Nf = filt.shape
	Mf2 = Mf // 2
	Nf2 = Nf // 2
	result = numpy.zeros_like(image)
	for i in range(Mf2, M - Mf2):
	for j in range(Nf2, N - Nf2):
	num = 0.0
	for ii in range(Mf):
	for jj in range(Nf):
	num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
	result[i, j] = num
	outputs[0][0] = result
	return numpy.zeros(0)

	fastfilter_2d_theano_auto = numba.autojit(filter2d_theano)
	image = numpy.random.random((100, 100))
	filt = numpy.random.random((10, 10))
	i = [image, filt]
	o = [[None]]

	fastfilter_2d_theano_auto(None, i, o)
	t2 = timeit(fastfilter_2d_theano_auto, None, i, o)
	print 'fastfilter_2d_theano_auto', t2
	import time

	import numba
	from numba import double, jit
	import numpy
	import numpy as np
	import theano


	def timeit(f, *args):
	t0 = time.time()
	f(*args)
	t1 = time.time()
	return t1 - t0


	def filter2d(image, filt):
	M, N = image.shape
	Mf, Nf = filt.shape
	Mf2 = Mf // 2
	Nf2 = Nf // 2
	result = numpy.zeros_like(image)
	for i in range(Mf2, M - Mf2):
	for j in range(Nf2, N - Nf2):
	num = 0.0
	for ii in range(Mf):
	for jj in range(Nf):
	num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
	result[i, j] = num
	return result


	#The filter2d with the same signature as Theano
	#but not a class method.
	def filter2d_theano(node, inputs, outputs):
	image, filt = inputs
	M, N = image.shape
	Mf, Nf = filt.shape
	Mf2 = Mf // 2
	Nf2 = Nf // 2
	result = numpy.zeros_like(image)
	for i in range(Mf2, M - Mf2):
	for j in range(Nf2, N - Nf2):
	num = 0.0
	for ii in range(Mf):
	for jj in range(Nf):
	num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
	result[i, j] = num
	outputs[0][0] = result
	return numpy.zeros(0)

	fastfilter_2d = jit(double[:,:](double[:,:], double[:,:]))(filter2d)
	image = numpy.random.random((100, 100))
	filt = numpy.random.random((10, 10))
	t0 = time.time()
	t0 = timeit(filter2d, image, filt)
	t1 = timeit(fastfilter_2d, image, filt)

	print "python", t0
	print "numba", t1

	i = [image, filt]
	o = [[None]]
	in1_type = numba.typeof([image, filt])
	#in1_type = numba.typedlist(numba.double)
	#in1_type = numba.map_dtype()
	out_type = numba.f8[:]
	#fastfilter_2d_theano = numba.jit(out_type(numba.object_,
	# in1_type,
	# numba.object_))(filter2d_theano)


	class Filter2d(theano.Op):
	def make_node(self, image, filt):
	image = theano.tensor.as_tensor_variable(image)
	filt = theano.tensor.as_tensor_variable(filt)
	assert image.ndim == 2
	assert filt.ndim == 2
	return theano.Apply(self, [image, filt], [image.type()])

	def make_thunk(self, node, storage_map, compute_map, no_recycling):
	in1_type = getattr(numba, node.inputs[0].dtype)
	in2_type = getattr(numba, node.inputs[1].dtype)
	out_type = getattr(numba, node.outputs[0].dtype)
	self.numba_fct = numba.jit(out_type[:, :](in1_type[:, :],
	in2_type[:, :]))(filter2d)

	#Assert that Numba infered the same output as our make_node
	assert isinstance(self.numba_fct.signature.return_type,
	numba.typesystem.types.array_)
	assert (str(self.numba_fct.signature.return_type.dtype) ==
	node.outputs[0].dtype)
	assert (self.numba_fct.signature.return_type.ndim ==
	node.outputs[0].ndim)

	return super(Filter2d, self).make_thunk(
	node, storage_map, compute_map, no_recycling)

	def perform(self, node, inputs, outputs):
	image, filt = inputs
	out = self.numba_fct(image, filt)
	outputs[0][0] = out

	m1, m2 = theano.tensor.matrices('ab')

	out = Filter2d()(m1, m2)
	f = theano.function([m1, m2], out)
	assert numpy.allclose(f(image, filt), filter2d(image, filt))
	t3 = timeit(f, image, filt)
	print "theano+numba manual", t3

	# The following show a Numba bug
	print "Show Numba Bug!"
	fastfilter_2d_theano_auto = numba.autojit(filter2d_theano)
	fastfilter_2d_theano_auto(None, i, o)
	t2 = timeit(fastfilter_2d_theano_auto, None, i, o)
	print 'fastfilter_2d_theano_auto', t2
	#res2 = fastfilter_2d_theano(None, i, o)
	#res2 = o[0][0]


	class Filter2d_v2(theano.Op):
	def make_node(self, image, filt):
	image = theano.tensor.as_tensor_variable(image)
	filt = theano.tensor.as_tensor_variable(filt)
	assert image.ndim == 2
	assert filt.ndim == 2

	return theano.Apply(self, [image, filt], [image.type()])

	def make_thunk(self, node, storage_map, compute_map, no_recycling):
	self.perform = numba.autojit(filter2d_theano)
	in1_type = numba.typeof([np.arange(10, dtype=np.double).reshape(2, 5)])
	in2_type = getattr(numba, node.inputs[1].dtype)
	out_type = numba.f8[:]
	self.perform = numba.jit(out_type(numba.object_,
	in1_type,
	numba.object_))(filter2d_theano)
	return super(Filter2d_v2, self).make_thunk(
	node, storage_map, compute_map, no_recycling)

	def perform(self, node, inputs, outputs):
	image, filt = inputs
	M, N = image.shape
	Mf, Nf = filt.shape
	Mf2 = Mf // 2
	Nf2 = Nf // 2
	result = numpy.zeros_like(image)
	for i in range(Mf2, M - Mf2):
	for j in range(Nf2, N - Nf2):
	num = 0.0
	for ii in range(Mf):
	for jj in range(Nf):
	num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
	result[i, j] = num
	outputs[0][0] = result


	out = Filter2d_v2()(m1, m2)
	f = theano.function([m1, m2], out)
	theano.printing.debugprint(f)
	print f(image, filt)
	assert numpy.allclose(f(image, filt), filter2d(image, filt))
	"""This file provide an example of using Numba with Theano.

	It show how to make a Theano op that use a numba to
	accelerate its python code.

	This could be done further. The "loop" that interate on all the
	operation that a Theano function do is in C. So we end up with having
	c code that call python code that call numba compiled code. We could
	modify Theano to make it reuse directly the Numba executable from C,
	bypassing the python overhead.

	"""
	import time

	import numba
	from numba import double, jit
	import numpy
	import numpy as np
	import theano


	def timeit(f, *args):
	t0 = time.time()
	f(*args)
	t1 = time.time()
	return t1 - t0


	def filter2d(image, filt):
	M, N = image.shape
	Mf, Nf = filt.shape
	Mf2 = Mf // 2
	Nf2 = Nf // 2
	result = numpy.zeros_like(image)
	for i in range(Mf2, M - Mf2):
	for j in range(Nf2, N - Nf2):
	num = 0.0
	for ii in range(Mf):
	for jj in range(Nf):
	num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
	result[i, j] = num
	return result


	#The filter2d with the same signature as Theano
	#but not a class method.
	def filter2d_theano(node, inputs, outputs):
	image, filt = inputs
	M, N = image.shape
	Mf, Nf = filt.shape
	Mf2 = Mf // 2
	Nf2 = Nf // 2
	result = numpy.zeros_like(image)
	for i in range(Mf2, M - Mf2):
	for j in range(Nf2, N - Nf2):
	num = 0.0
	for ii in range(Mf):
	for jj in range(Nf):
	num += (filt[Mf-1-ii, Nf-1-jj] * image[i-Mf2+ii, j-Nf2+jj])
	result[i, j] = num
	outputs[0][0] = result
	return numpy.zeros(0)


	class Filter2d(theano.Op):
	def make_node(self, image, filt):
	image = theano.tensor.as_tensor_variable(image)
	filt = theano.tensor.as_tensor_variable(filt)
	assert image.ndim == 2
	assert filt.ndim == 2
	return theano.Apply(self, [image, filt], [image.type()])

	def make_thunk(self, node, storage_map, compute_map, no_recycling):
	in1_type = getattr(numba, node.inputs[0].dtype)
	in2_type = getattr(numba, node.inputs[1].dtype)
	out_type = getattr(numba, node.outputs[0].dtype)
	self.numba_fct = numba.jit(out_type[:, :](in1_type[:, :],
	in2_type[:, :]))(filter2d)

	#Assert that Numba infered the same output as our make_node
	assert isinstance(self.numba_fct.signature.return_type,
	numba.typesystem.types.array_)
	assert (str(self.numba_fct.signature.return_type.dtype) ==
	node.outputs[0].dtype)
	assert (self.numba_fct.signature.return_type.ndim ==
	node.outputs[0].ndim)

	return super(Filter2d, self).make_thunk(
	node, storage_map, compute_map, no_recycling)

	def perform(self, node, inputs, outputs):
	image, filt = inputs
	out = self.numba_fct(image, filt)
	outputs[0][0] = out

	fastfilter_2d = jit(double[:, :](double[:, :], double[:, :]))(filter2d)
	image = numpy.random.random((100, 100))
	filt = numpy.random.random((10, 10))

	# time python version
	t0 = timeit(filter2d, image, filt)
	print "python", t0

	# time numba version
	t1 = timeit(fastfilter_2d, image, filt)
	print "numba", t1


	# time/test theano version
	m1, m2 = theano.tensor.matrices('ab')
	out = Filter2d()(m1, m2)
	f = theano.function([m1, m2], out)
	assert numpy.allclose(f(image, filt), filter2d(image, filt))
	t2 = timeit(f, image, filt)
	print "theano+numba manual", t2
	print "Theano speed up vs Numba (slowdown caused by extra overhead)", t1/t2