hans · December 13, 2015 22:38
diff --git a/refined_stack.py b/refined_stack.py
 from collections import OrderedDict

 import numpy as np
 import theano
 from theano import tensor as T


 class MyAdvancedSetSubtensor1(T.subtensor.AdvancedIncSubtensor1):

    def get_inplace(self):
        return self._inplace
    def set_inplace(self, val):
        print "=== set_inplace{%s}" % self.purpose, val
        self._inplace = val
    inplace = property(get_inplace, set_inplace)

    def __init__(self, purpose, *args, **kwargs):
        kwargs["set_instead_of_inc"] = True

        self.purpose = purpose
        print "in constructor", kwargs
        super(MyAdvancedSetSubtensor1, self).__init__(
              *args, **kwargs)

    def __str__(self):
        super_str = super(MyAdvancedSetSubtensor1, self).__str__()
        super_str += "{%s}" % self.purpose
        return super_str

    def clone_inplace(self):
        return self.__class__(self.purpose, inplace=True, set_instead_of_inc=self.set_instead_of_inc)


 from theano.tensor.subtensor import *
 def my_inc_subtensor(x, y, purpose, inplace=False, set_instead_of_inc=False,
                     tolerate_inplace_aliasing=False):
    """
    Return x with the given subtensor incremented by y.

    Parameters
    ----------
    x
        The symbolic result of a Subtensor operation.
    y
        The amount by which to increment the subtensor in question.
    inplace
        Don't use. Theano will do it when possible.
    set_instead_of_inc
        If True, do a set_subtensor instead.
    tolerate_inplace_aliasing:
        Allow x and y to be views of a single underlying array even while
        working inplace. For correct results, x and y must not be overlapping
        views; if they overlap, the result of this Op will generally be
        incorrect. This value has no effect if inplace=False.

    Examples
    --------
    To replicate the numpy expression "r[10:] += 5", type

    >>> r = ivector()
    >>> new_r = inc_subtensor(r[10:], 5)

    """
    # First of all, y cannot have a higher dimension than x,
    # nor have non-broadcastable dimensions where x is broadcastable.

    x = theano.tensor.as_tensor_variable(x)
    y = theano.tensor.as_tensor_variable(y)

    if y.ndim > x.ndim:
        raise TypeError(("Trying to increment a %d-dimensional "
                         "subtensor with a %d-dimensional value.") % (x.ndim,
                                                                      y.ndim))

    dim_offset = x.ndim - y.ndim
    for dim in xrange(y.ndim):
        if (x.broadcastable[dim + dim_offset] and not y.broadcastable[dim]):
            # It is acceptable to try to increment a subtensor with a
            # broadcastable dim with a tensor that is not broadcastable
            # on that dimension. However, its length must then be 1.
            # We insert a Rebroadcast Op to make sure it is the case.
            y = addbroadcast(y, dim)

    if not x.owner:
        raise TypeError('x must be the result of a subtensor operation')

    # retrieve idx_list from x.owner
    if isinstance(x.owner.op, Subtensor):
        if tolerate_inplace_aliasing:
            destroyhandler_tolerate_aliased = [[0, 1]]
        else:
            destroyhandler_tolerate_aliased = []
        the_op = IncSubtensor(
            x.owner.op.idx_list, inplace, set_instead_of_inc,
            destroyhandler_tolerate_aliased=destroyhandler_tolerate_aliased)
        real_x = x.owner.inputs[0]
        real_idxargs = x.owner.inputs[1:]
        return the_op(real_x, y, *real_idxargs)
    elif isinstance(x.owner.op, AdvancedSubtensor1):
        real_x = x.owner.inputs[0]
        ilist = x.owner.inputs[1]
        print "HERE"
        if set_instead_of_inc:
            the_op = MyAdvancedSetSubtensor1(purpose)
        else:
            the_op = AdvancedSetSubtensor1(inplace,
                                           set_instead_of_inc=set_instead_of_inc)
        return the_op(real_x, y, ilist)
    elif isinstance(x.owner.op, AdvancedSubtensor):
        real_x = x.owner.inputs[0]
        ilist = x.owner.inputs[1:]

        the_op = AdvancedIncSubtensor(inplace,
                                      set_instead_of_inc=set_instead_of_inc)
        return the_op(real_x, y, *ilist)
    elif isinstance(x.owner.op, DimShuffle):
        inner_x = x.owner.inputs[0]
        # In the dimshuffle case, there are in fact two dimshuffles:
        # one to make the indexed dimension the last one,
        # and one to put it back where it was. So, in the case where we have
        # inc_subtensor(x[:,i], y), the graph is actually
        # inc_subtensor((x.T)[i].T, y).
        # We could get all the way to x, and then get rid of the dimshuffles
        # completely, but the problem is that advanced_inc_subtensor1 can only
        # work on the first (outer-most, left-most) dimension of x,
        # just like advanced_subtensor1.
        # So we call advanced_inc_subtensor1(x.T, i, y.T) (as we also need to
        # transpose y if it is not a scalar or a vector), but then we need to
        # return something that has the same shape as x, not as x.T (inner_x).
        # So re-apply the outer dimshuffle on the new inc_subtensor,
        # and return advanced_inc_subtensor1(x.T, i, y.T).T.

        # Get the dimshuffle pattern to apply to y.
        x_order = x.owner.op.new_order
        y_order = ['x'] * x.ndim
        for i, v in enumerate(x_order):
            if v != 'x' and (v - dim_offset) >= 0:
                y_order[v - dim_offset] = i

        # Warn if this code path would have produced wrong results in the past
        if config.warn.inc_set_subtensor1:
            # Dimshuffle pattern for y that would be equivalent to past code
            prev_y_order = ['x'] * (dim_offset) + list(range(y.ndim))
            if y_order != prev_y_order:
                warnings.warn(
                    'Although your current code is fine, please note that '
                    'earlier versions prior to 0.7 (or this development '
                    'version) may have yielded an incorrect result in '
                    'this `inc_subtensor` or `set_subtensor` operation. '
                    'To remove this warning, you can either set the '
                    '`warn.inc_set_subtensor1` config option to `False`, '
                    'or `warn.ignore_bug_before` to at least "0.7".',
                    stacklevel=2)

        inner_incsubtensor = inc_subtensor(
            inner_x,
            y.dimshuffle(y_order),
            inplace=inplace,
            set_instead_of_inc=set_instead_of_inc,
            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
        return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
    elif isinstance(x.owner.op, theano.tensor.Reshape):
        # This case happens when the indices are not arranged as a vector, but
        # as a higher-dimensional array. This is handled by the subtensor
        # by flattening this list, taking the subtensor, then reshaping the
        # result.
        inner_x = x.owner.inputs[0]
        # Try to apply inc_subtensor on inner_x.
        # If it works, there is no need to reshape, as the inc_subtensor
        # will have the same shape as inner_x, which is what we want.
        # We also explicitly duplicate y to its broadcasted shape
        # before we partially flatten it to inner_x dimension. This is
        # not strictly needed in all cases, but it is easier this way.
        if y.ndim > 0:
            # This if is needed to prevent some useless warning about
            # old code bug.
            expanded_y = alloc(y, *[x.shape[i] for i in xrange(x.ndim)])
            flattened_y = expanded_y.flatten(inner_x.ndim)
        else:
            flattened_y = y

        # Warn if this code path would have produced wrong results in the past
        if config.warn.inc_set_subtensor1:
            if inner_x.ndim > 1 and sum(y.broadcastable) > 0:
                warnings.warn(
                    'Although your current code is fine, please note that '
                    'earlier versions prior to 0.7 (or this development '
                    'version) may have yielded an incorrect result in '
                    'this `inc_subtensor` or `set_subtensor` operation. '
                    'To remove this warning, you can either set the '
                    '`warn.inc_set_subtensor1` config option to `False`, '
                    'or `warn.ignore_bug_before` to at least "0.7".',
                    stacklevel=2)

        inner_incsubtensor = inc_subtensor(
            inner_x,
            flattened_y,
            inplace=inplace,
            set_instead_of_inc=set_instead_of_inc,
            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
        return inner_incsubtensor
    else:
        raise TypeError('x must be the result of a subtensor operation')


 def my_set_subtensor(x, y, purpose):
    return my_inc_subtensor(x, y, purpose, set_instead_of_inc=True)


 ########################################


 batch_size = 2
 stack_size = 30
 embedding_dim = 20
 num_steps = 5


 # num_steps blocks of batch_size elements
 #stack = theano.shared(np.zeros((num_steps * batch_size, embedding_dim), dtype=np.float32),
 #                      name="stack")
 stack = T.zeros((num_steps * batch_size, embedding_dim))
 #t = theano.shared(0, name="t")
 #merge_cursors = theano.shared(np.ones((batch_size,), dtype=np.int32) * -1,
 #                             name="merge_cursor")
 merge_cursors = T.zeros((batch_size,), dtype=np.int32)
 # batch_size blocks of num_steps elements
 #merge_queue = theano.shared(np.zeros((batch_size * num_steps,), dtype=np.int32),
 #                            name="merge_queue")
 merge_queue = T.zeros((batch_size * num_steps,), dtype=np.int32)

 some_coeff = T.scalar("some_coeff")


 def do_stack_ops(t, X_t, mask_t, stack_t, merge_cursors_t, merge_queue_t,
                 inplace=False):
    batch_size = X_t.shape[0]
    batch_range = T.arange(batch_size)
    push_value = some_coeff * X_t

    merge1 = stack_t[(t - 1) * batch_size + batch_range]
    # Get pointers into stack for merge op's 2nd operand.
    merge_ptrs = merge_queue_t[merge_cursors_t - 1 + batch_range * num_steps]
    # Retrieve merge op's 2nd operand.
    merge2 = stack_t[merge_ptrs * num_steps + batch_range]

    # Compose: add stack values.
    merge_value = some_coeff * (merge1 + merge2)

    mask2 = mask_t.dimshuffle(0, "x")
    top_t = mask2 * merge_value + (1 - mask2) * push_value

    stack_next = my_set_subtensor(stack_t[t * batch_size + batch_range],
                                  top_t, "stack_update")
    cursor_next = merge_cursors_t + (mask_t * -1 + (1 - mask_t) * 1)
    queue_next = my_set_subtensor(merge_queue_t[batch_range * num_steps + cursor_next],
                                  t, "queue_update")

    return stack_next, cursor_next, queue_next


 ##########################################################


 X = T.itensor3("X") # seq_length * batch_size * emb_dim
 mask = T.imatrix("mask") # seq_length * batch_size

 scan_out, _ = theano.scan(do_stack_ops, sequences=[T.arange(X.shape[0]), X, mask],
                          outputs_info=[stack, merge_cursors, merge_queue])


 ###########################################################

 # Add an optimizer
 from theano import gof
 from theano.gof import TopoOptimizer
 from theano import compile
 from theano.tensor.opt import register_canonicalize, register_specialize

 @register_canonicalize("force_inplacee")
 @register_specialize("force_inplacee")
 @gof.local_optimizer([MyAdvancedSetSubtensor1], inplace=True)
 def force_inplace(node):
    if isinstance(node.op, MyAdvancedSetSubtensor1):
        print "========== HELLO"
        print node.op, node.op.inplace
        new_op = node.op.clone_inplace()
        new_node = new_op(*node.inputs)
        # TODO copy_stack_trace
        return [new_node]
    return False
 compile.optdb.register("force_inplace",
                       TopoOptimizer(force_inplace, failure_callback=TopoOptimizer.warn_inplace), 74, "fast_run", "inplace")



 ##############################################################


 stack_out = scan_out[0][X.shape[0] - 1].reshape((num_steps, batch_size, embedding_dim))
 some_result = stack_out.sum()
 grads = T.grad(some_result, some_coeff)

 # Gradients are what make the no_inplace necessary in the general case.
 # But we want to avoid that in this implementation.
 f_grads = theano.function([X, mask, some_coeff], (stack_out, grads))

 theano.printing.debugprint(f_grads.maker.fgraph.outputs[0])
	from collections import OrderedDict

	import numpy as np
	import theano
	from theano import tensor as T


	class MyAdvancedSetSubtensor1(T.subtensor.AdvancedIncSubtensor1):

	def get_inplace(self):
	return self._inplace
	def set_inplace(self, val):
	print "=== set_inplace{%s}" % self.purpose, val
	self._inplace = val
	inplace = property(get_inplace, set_inplace)

	def __init__(self, purpose, args, *kwargs):
	kwargs["set_instead_of_inc"] = True

	self.purpose = purpose
	print "in constructor", kwargs
	super(MyAdvancedSetSubtensor1, self).__init__(
	args, *kwargs)

	def __str__(self):
	super_str = super(MyAdvancedSetSubtensor1, self).__str__()
	super_str += "{%s}" % self.purpose
	return super_str

	def clone_inplace(self):
	return self.__class__(self.purpose, inplace=True, set_instead_of_inc=self.set_instead_of_inc)


	from theano.tensor.subtensor import *
	def my_inc_subtensor(x, y, purpose, inplace=False, set_instead_of_inc=False,
	tolerate_inplace_aliasing=False):
	"""
	Return x with the given subtensor incremented by y.

	Parameters
	----------
	x
	The symbolic result of a Subtensor operation.
	y
	The amount by which to increment the subtensor in question.
	inplace
	Don't use. Theano will do it when possible.
	set_instead_of_inc
	If True, do a set_subtensor instead.
	tolerate_inplace_aliasing:
	Allow x and y to be views of a single underlying array even while
	working inplace. For correct results, x and y must not be overlapping
	views; if they overlap, the result of this Op will generally be
	incorrect. This value has no effect if inplace=False.

	Examples
	--------
	To replicate the numpy expression "r[10:] += 5", type

	>>> r = ivector()
	>>> new_r = inc_subtensor(r[10:], 5)

	"""
	# First of all, y cannot have a higher dimension than x,
	# nor have non-broadcastable dimensions where x is broadcastable.

	x = theano.tensor.as_tensor_variable(x)
	y = theano.tensor.as_tensor_variable(y)

	if y.ndim > x.ndim:
	raise TypeError(("Trying to increment a %d-dimensional "
	"subtensor with a %d-dimensional value.") % (x.ndim,
	y.ndim))

	dim_offset = x.ndim - y.ndim
	for dim in xrange(y.ndim):
	if (x.broadcastable[dim + dim_offset] and not y.broadcastable[dim]):
	# It is acceptable to try to increment a subtensor with a
	# broadcastable dim with a tensor that is not broadcastable
	# on that dimension. However, its length must then be 1.
	# We insert a Rebroadcast Op to make sure it is the case.
	y = addbroadcast(y, dim)

	if not x.owner:
	raise TypeError('x must be the result of a subtensor operation')

	# retrieve idx_list from x.owner
	if isinstance(x.owner.op, Subtensor):
	if tolerate_inplace_aliasing:
	destroyhandler_tolerate_aliased = [[0, 1]]
	else:
	destroyhandler_tolerate_aliased = []
	the_op = IncSubtensor(
	x.owner.op.idx_list, inplace, set_instead_of_inc,
	destroyhandler_tolerate_aliased=destroyhandler_tolerate_aliased)
	real_x = x.owner.inputs[0]
	real_idxargs = x.owner.inputs[1:]
	return the_op(real_x, y, *real_idxargs)
	elif isinstance(x.owner.op, AdvancedSubtensor1):
	real_x = x.owner.inputs[0]
	ilist = x.owner.inputs[1]
	print "HERE"
	if set_instead_of_inc:
	the_op = MyAdvancedSetSubtensor1(purpose)
	else:
	the_op = AdvancedSetSubtensor1(inplace,
	set_instead_of_inc=set_instead_of_inc)
	return the_op(real_x, y, ilist)
	elif isinstance(x.owner.op, AdvancedSubtensor):
	real_x = x.owner.inputs[0]
	ilist = x.owner.inputs[1:]

	the_op = AdvancedIncSubtensor(inplace,
	set_instead_of_inc=set_instead_of_inc)
	return the_op(real_x, y, *ilist)
	elif isinstance(x.owner.op, DimShuffle):
	inner_x = x.owner.inputs[0]
	# In the dimshuffle case, there are in fact two dimshuffles:
	# one to make the indexed dimension the last one,
	# and one to put it back where it was. So, in the case where we have
	# inc_subtensor(x[:,i], y), the graph is actually
	# inc_subtensor((x.T)[i].T, y).
	# We could get all the way to x, and then get rid of the dimshuffles
	# completely, but the problem is that advanced_inc_subtensor1 can only
	# work on the first (outer-most, left-most) dimension of x,
	# just like advanced_subtensor1.
	# So we call advanced_inc_subtensor1(x.T, i, y.T) (as we also need to
	# transpose y if it is not a scalar or a vector), but then we need to
	# return something that has the same shape as x, not as x.T (inner_x).
	# So re-apply the outer dimshuffle on the new inc_subtensor,
	# and return advanced_inc_subtensor1(x.T, i, y.T).T.

	# Get the dimshuffle pattern to apply to y.
	x_order = x.owner.op.new_order
	y_order = ['x'] * x.ndim
	for i, v in enumerate(x_order):
	if v != 'x' and (v - dim_offset) >= 0:
	y_order[v - dim_offset] = i

	# Warn if this code path would have produced wrong results in the past
	if config.warn.inc_set_subtensor1:
	# Dimshuffle pattern for y that would be equivalent to past code
	prev_y_order = ['x'] * (dim_offset) + list(range(y.ndim))
	if y_order != prev_y_order:
	warnings.warn(
	'Although your current code is fine, please note that '
	'earlier versions prior to 0.7 (or this development '
	'version) may have yielded an incorrect result in '
	'this `inc_subtensor` or `set_subtensor` operation. '
	'To remove this warning, you can either set the '
	'`warn.inc_set_subtensor1` config option to `False`, '
	'or `warn.ignore_bug_before` to at least "0.7".',
	stacklevel=2)

	inner_incsubtensor = inc_subtensor(
	inner_x,
	y.dimshuffle(y_order),
	inplace=inplace,
	set_instead_of_inc=set_instead_of_inc,
	tolerate_inplace_aliasing=tolerate_inplace_aliasing)
	return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
	elif isinstance(x.owner.op, theano.tensor.Reshape):
	# This case happens when the indices are not arranged as a vector, but
	# as a higher-dimensional array. This is handled by the subtensor
	# by flattening this list, taking the subtensor, then reshaping the
	# result.
	inner_x = x.owner.inputs[0]
	# Try to apply inc_subtensor on inner_x.
	# If it works, there is no need to reshape, as the inc_subtensor
	# will have the same shape as inner_x, which is what we want.
	# We also explicitly duplicate y to its broadcasted shape
	# before we partially flatten it to inner_x dimension. This is
	# not strictly needed in all cases, but it is easier this way.
	if y.ndim > 0:
	# This if is needed to prevent some useless warning about
	# old code bug.
	expanded_y = alloc(y, *[x.shape[i] for i in xrange(x.ndim)])
	flattened_y = expanded_y.flatten(inner_x.ndim)
	else:
	flattened_y = y

	# Warn if this code path would have produced wrong results in the past
	if config.warn.inc_set_subtensor1:
	if inner_x.ndim > 1 and sum(y.broadcastable) > 0:
	warnings.warn(
	'Although your current code is fine, please note that '
	'earlier versions prior to 0.7 (or this development '
	'version) may have yielded an incorrect result in '
	'this `inc_subtensor` or `set_subtensor` operation. '
	'To remove this warning, you can either set the '
	'`warn.inc_set_subtensor1` config option to `False`, '
	'or `warn.ignore_bug_before` to at least "0.7".',
	stacklevel=2)

	inner_incsubtensor = inc_subtensor(
	inner_x,
	flattened_y,
	inplace=inplace,
	set_instead_of_inc=set_instead_of_inc,
	tolerate_inplace_aliasing=tolerate_inplace_aliasing)
	return inner_incsubtensor
	else:
	raise TypeError('x must be the result of a subtensor operation')


	def my_set_subtensor(x, y, purpose):
	return my_inc_subtensor(x, y, purpose, set_instead_of_inc=True)


	########################################


	batch_size = 2
	stack_size = 30
	embedding_dim = 20
	num_steps = 5


	# num_steps blocks of batch_size elements
	#stack = theano.shared(np.zeros((num_steps * batch_size, embedding_dim), dtype=np.float32),
	# name="stack")
	stack = T.zeros((num_steps * batch_size, embedding_dim))
	#t = theano.shared(0, name="t")
	#merge_cursors = theano.shared(np.ones((batch_size,), dtype=np.int32) * -1,
	# name="merge_cursor")
	merge_cursors = T.zeros((batch_size,), dtype=np.int32)
	# batch_size blocks of num_steps elements
	#merge_queue = theano.shared(np.zeros((batch_size * num_steps,), dtype=np.int32),
	# name="merge_queue")
	merge_queue = T.zeros((batch_size * num_steps,), dtype=np.int32)

	some_coeff = T.scalar("some_coeff")


	def do_stack_ops(t, X_t, mask_t, stack_t, merge_cursors_t, merge_queue_t,
	inplace=False):
	batch_size = X_t.shape[0]
	batch_range = T.arange(batch_size)
	push_value = some_coeff * X_t

	merge1 = stack_t[(t - 1) * batch_size + batch_range]
	# Get pointers into stack for merge op's 2nd operand.
	merge_ptrs = merge_queue_t[merge_cursors_t - 1 + batch_range * num_steps]
	# Retrieve merge op's 2nd operand.
	merge2 = stack_t[merge_ptrs * num_steps + batch_range]

	# Compose: add stack values.
	merge_value = some_coeff * (merge1 + merge2)

	mask2 = mask_t.dimshuffle(0, "x")
	top_t = mask2 * merge_value + (1 - mask2) * push_value

	stack_next = my_set_subtensor(stack_t[t * batch_size + batch_range],
	top_t, "stack_update")
	cursor_next = merge_cursors_t + (mask_t * -1 + (1 - mask_t) * 1)
	queue_next = my_set_subtensor(merge_queue_t[batch_range * num_steps + cursor_next],
	t, "queue_update")

	return stack_next, cursor_next, queue_next


	##########################################################


	X = T.itensor3("X") # seq_length * batch_size * emb_dim
	mask = T.imatrix("mask") # seq_length * batch_size

	scan_out, _ = theano.scan(do_stack_ops, sequences=[T.arange(X.shape[0]), X, mask],
	outputs_info=[stack, merge_cursors, merge_queue])


	###########################################################

	# Add an optimizer
	from theano import gof
	from theano.gof import TopoOptimizer
	from theano import compile
	from theano.tensor.opt import register_canonicalize, register_specialize

	@register_canonicalize("force_inplacee")
	@register_specialize("force_inplacee")
	@gof.local_optimizer([MyAdvancedSetSubtensor1], inplace=True)
	def force_inplace(node):
	if isinstance(node.op, MyAdvancedSetSubtensor1):
	print "========== HELLO"
	print node.op, node.op.inplace
	new_op = node.op.clone_inplace()
	new_node = new_op(*node.inputs)
	# TODO copy_stack_trace
	return [new_node]
	return False
	compile.optdb.register("force_inplace",
	TopoOptimizer(force_inplace, failure_callback=TopoOptimizer.warn_inplace), 74, "fast_run", "inplace")



	##############################################################


	stack_out = scan_out[0][X.shape[0] - 1].reshape((num_steps, batch_size, embedding_dim))
	some_result = stack_out.sum()
	grads = T.grad(some_result, some_coeff)

	# Gradients are what make the no_inplace necessary in the general case.
	# But we want to avoid that in this implementation.
	f_grads = theano.function([X, mask, some_coeff], (stack_out, grads))

	theano.printing.debugprint(f_grads.maker.fgraph.outputs[0])