Skip to content

Instantly share code, notes, and snippets.

@hans
Created December 13, 2015 22:38
Show Gist options
  • Save hans/3ccbd74e59f3fbf93bb3 to your computer and use it in GitHub Desktop.
Save hans/3ccbd74e59f3fbf93bb3 to your computer and use it in GitHub Desktop.
from collections import OrderedDict
import numpy as np
import theano
from theano import tensor as T
class MyAdvancedSetSubtensor1(T.subtensor.AdvancedIncSubtensor1):
def get_inplace(self):
return self._inplace
def set_inplace(self, val):
print "=== set_inplace{%s}" % self.purpose, val
self._inplace = val
inplace = property(get_inplace, set_inplace)
def __init__(self, purpose, *args, **kwargs):
kwargs["set_instead_of_inc"] = True
self.purpose = purpose
print "in constructor", kwargs
super(MyAdvancedSetSubtensor1, self).__init__(
*args, **kwargs)
def __str__(self):
super_str = super(MyAdvancedSetSubtensor1, self).__str__()
super_str += "{%s}" % self.purpose
return super_str
def clone_inplace(self):
return self.__class__(self.purpose, inplace=True, set_instead_of_inc=self.set_instead_of_inc)
from theano.tensor.subtensor import *
def my_inc_subtensor(x, y, purpose, inplace=False, set_instead_of_inc=False,
tolerate_inplace_aliasing=False):
"""
Return x with the given subtensor incremented by y.
Parameters
----------
x
The symbolic result of a Subtensor operation.
y
The amount by which to increment the subtensor in question.
inplace
Don't use. Theano will do it when possible.
set_instead_of_inc
If True, do a set_subtensor instead.
tolerate_inplace_aliasing:
Allow x and y to be views of a single underlying array even while
working inplace. For correct results, x and y must not be overlapping
views; if they overlap, the result of this Op will generally be
incorrect. This value has no effect if inplace=False.
Examples
--------
To replicate the numpy expression "r[10:] += 5", type
>>> r = ivector()
>>> new_r = inc_subtensor(r[10:], 5)
"""
# First of all, y cannot have a higher dimension than x,
# nor have non-broadcastable dimensions where x is broadcastable.
x = theano.tensor.as_tensor_variable(x)
y = theano.tensor.as_tensor_variable(y)
if y.ndim > x.ndim:
raise TypeError(("Trying to increment a %d-dimensional "
"subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim))
dim_offset = x.ndim - y.ndim
for dim in xrange(y.ndim):
if (x.broadcastable[dim + dim_offset] and not y.broadcastable[dim]):
# It is acceptable to try to increment a subtensor with a
# broadcastable dim with a tensor that is not broadcastable
# on that dimension. However, its length must then be 1.
# We insert a Rebroadcast Op to make sure it is the case.
y = addbroadcast(y, dim)
if not x.owner:
raise TypeError('x must be the result of a subtensor operation')
# retrieve idx_list from x.owner
if isinstance(x.owner.op, Subtensor):
if tolerate_inplace_aliasing:
destroyhandler_tolerate_aliased = [[0, 1]]
else:
destroyhandler_tolerate_aliased = []
the_op = IncSubtensor(
x.owner.op.idx_list, inplace, set_instead_of_inc,
destroyhandler_tolerate_aliased=destroyhandler_tolerate_aliased)
real_x = x.owner.inputs[0]
real_idxargs = x.owner.inputs[1:]
return the_op(real_x, y, *real_idxargs)
elif isinstance(x.owner.op, AdvancedSubtensor1):
real_x = x.owner.inputs[0]
ilist = x.owner.inputs[1]
print "HERE"
if set_instead_of_inc:
the_op = MyAdvancedSetSubtensor1(purpose)
else:
the_op = AdvancedSetSubtensor1(inplace,
set_instead_of_inc=set_instead_of_inc)
return the_op(real_x, y, ilist)
elif isinstance(x.owner.op, AdvancedSubtensor):
real_x = x.owner.inputs[0]
ilist = x.owner.inputs[1:]
the_op = AdvancedIncSubtensor(inplace,
set_instead_of_inc=set_instead_of_inc)
return the_op(real_x, y, *ilist)
elif isinstance(x.owner.op, DimShuffle):
inner_x = x.owner.inputs[0]
# In the dimshuffle case, there are in fact two dimshuffles:
# one to make the indexed dimension the last one,
# and one to put it back where it was. So, in the case where we have
# inc_subtensor(x[:,i], y), the graph is actually
# inc_subtensor((x.T)[i].T, y).
# We could get all the way to x, and then get rid of the dimshuffles
# completely, but the problem is that advanced_inc_subtensor1 can only
# work on the first (outer-most, left-most) dimension of x,
# just like advanced_subtensor1.
# So we call advanced_inc_subtensor1(x.T, i, y.T) (as we also need to
# transpose y if it is not a scalar or a vector), but then we need to
# return something that has the same shape as x, not as x.T (inner_x).
# So re-apply the outer dimshuffle on the new inc_subtensor,
# and return advanced_inc_subtensor1(x.T, i, y.T).T.
# Get the dimshuffle pattern to apply to y.
x_order = x.owner.op.new_order
y_order = ['x'] * x.ndim
for i, v in enumerate(x_order):
if v != 'x' and (v - dim_offset) >= 0:
y_order[v - dim_offset] = i
# Warn if this code path would have produced wrong results in the past
if config.warn.inc_set_subtensor1:
# Dimshuffle pattern for y that would be equivalent to past code
prev_y_order = ['x'] * (dim_offset) + list(range(y.ndim))
if y_order != prev_y_order:
warnings.warn(
'Although your current code is fine, please note that '
'earlier versions prior to 0.7 (or this development '
'version) may have yielded an incorrect result in '
'this `inc_subtensor` or `set_subtensor` operation. '
'To remove this warning, you can either set the '
'`warn.inc_set_subtensor1` config option to `False`, '
'or `warn.ignore_bug_before` to at least "0.7".',
stacklevel=2)
inner_incsubtensor = inc_subtensor(
inner_x,
y.dimshuffle(y_order),
inplace=inplace,
set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
elif isinstance(x.owner.op, theano.tensor.Reshape):
# This case happens when the indices are not arranged as a vector, but
# as a higher-dimensional array. This is handled by the subtensor
# by flattening this list, taking the subtensor, then reshaping the
# result.
inner_x = x.owner.inputs[0]
# Try to apply inc_subtensor on inner_x.
# If it works, there is no need to reshape, as the inc_subtensor
# will have the same shape as inner_x, which is what we want.
# We also explicitly duplicate y to its broadcasted shape
# before we partially flatten it to inner_x dimension. This is
# not strictly needed in all cases, but it is easier this way.
if y.ndim > 0:
# This if is needed to prevent some useless warning about
# old code bug.
expanded_y = alloc(y, *[x.shape[i] for i in xrange(x.ndim)])
flattened_y = expanded_y.flatten(inner_x.ndim)
else:
flattened_y = y
# Warn if this code path would have produced wrong results in the past
if config.warn.inc_set_subtensor1:
if inner_x.ndim > 1 and sum(y.broadcastable) > 0:
warnings.warn(
'Although your current code is fine, please note that '
'earlier versions prior to 0.7 (or this development '
'version) may have yielded an incorrect result in '
'this `inc_subtensor` or `set_subtensor` operation. '
'To remove this warning, you can either set the '
'`warn.inc_set_subtensor1` config option to `False`, '
'or `warn.ignore_bug_before` to at least "0.7".',
stacklevel=2)
inner_incsubtensor = inc_subtensor(
inner_x,
flattened_y,
inplace=inplace,
set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return inner_incsubtensor
else:
raise TypeError('x must be the result of a subtensor operation')
def my_set_subtensor(x, y, purpose):
return my_inc_subtensor(x, y, purpose, set_instead_of_inc=True)
########################################
batch_size = 2
stack_size = 30
embedding_dim = 20
num_steps = 5
# num_steps blocks of batch_size elements
#stack = theano.shared(np.zeros((num_steps * batch_size, embedding_dim), dtype=np.float32),
# name="stack")
stack = T.zeros((num_steps * batch_size, embedding_dim))
#t = theano.shared(0, name="t")
#merge_cursors = theano.shared(np.ones((batch_size,), dtype=np.int32) * -1,
# name="merge_cursor")
merge_cursors = T.zeros((batch_size,), dtype=np.int32)
# batch_size blocks of num_steps elements
#merge_queue = theano.shared(np.zeros((batch_size * num_steps,), dtype=np.int32),
# name="merge_queue")
merge_queue = T.zeros((batch_size * num_steps,), dtype=np.int32)
some_coeff = T.scalar("some_coeff")
def do_stack_ops(t, X_t, mask_t, stack_t, merge_cursors_t, merge_queue_t,
inplace=False):
batch_size = X_t.shape[0]
batch_range = T.arange(batch_size)
push_value = some_coeff * X_t
merge1 = stack_t[(t - 1) * batch_size + batch_range]
# Get pointers into stack for merge op's 2nd operand.
merge_ptrs = merge_queue_t[merge_cursors_t - 1 + batch_range * num_steps]
# Retrieve merge op's 2nd operand.
merge2 = stack_t[merge_ptrs * num_steps + batch_range]
# Compose: add stack values.
merge_value = some_coeff * (merge1 + merge2)
mask2 = mask_t.dimshuffle(0, "x")
top_t = mask2 * merge_value + (1 - mask2) * push_value
stack_next = my_set_subtensor(stack_t[t * batch_size + batch_range],
top_t, "stack_update")
cursor_next = merge_cursors_t + (mask_t * -1 + (1 - mask_t) * 1)
queue_next = my_set_subtensor(merge_queue_t[batch_range * num_steps + cursor_next],
t, "queue_update")
return stack_next, cursor_next, queue_next
##########################################################
X = T.itensor3("X") # seq_length * batch_size * emb_dim
mask = T.imatrix("mask") # seq_length * batch_size
scan_out, _ = theano.scan(do_stack_ops, sequences=[T.arange(X.shape[0]), X, mask],
outputs_info=[stack, merge_cursors, merge_queue])
###########################################################
# Add an optimizer
from theano import gof
from theano.gof import TopoOptimizer
from theano import compile
from theano.tensor.opt import register_canonicalize, register_specialize
@register_canonicalize("force_inplacee")
@register_specialize("force_inplacee")
@gof.local_optimizer([MyAdvancedSetSubtensor1], inplace=True)
def force_inplace(node):
if isinstance(node.op, MyAdvancedSetSubtensor1):
print "========== HELLO"
print node.op, node.op.inplace
new_op = node.op.clone_inplace()
new_node = new_op(*node.inputs)
# TODO copy_stack_trace
return [new_node]
return False
compile.optdb.register("force_inplace",
TopoOptimizer(force_inplace, failure_callback=TopoOptimizer.warn_inplace), 74, "fast_run", "inplace")
##############################################################
stack_out = scan_out[0][X.shape[0] - 1].reshape((num_steps, batch_size, embedding_dim))
some_result = stack_out.sum()
grads = T.grad(some_result, some_coeff)
# Gradients are what make the no_inplace necessary in the general case.
# But we want to avoid that in this implementation.
f_grads = theano.function([X, mask, some_coeff], (stack_out, grads))
theano.printing.debugprint(f_grads.maker.fgraph.outputs[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment