Created
October 22, 2013 13:15
-
-
Save sisp/7100561 to your computer and use it in GitHub Desktop.
ERROR (theano.gof.opt): Optimization failure due to: remove_constants_and_unused_inputs_scan
ERROR (theano.gof.opt): TRACEBACK:
ERROR (theano.gof.opt): Traceback (most recent call last): File "/home/sigurd/.local/lib/python2.7/site-packages/Theano-0.6.0rc3-py2.7.egg/theano/gof/opt.py", line 1216, in process_node replacements = lopt.transform(nod…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import theano | |
import theano.tensor as T | |
floatX = theano.config.floatX | |
class GaussNewtonMatrix(object): | |
def __init__(self, s): | |
# `s` is the linear network outputs, i.e. the network output | |
# without having applied the activation function | |
self._s = s | |
def __call__(self, v, cost, parameters, damp): | |
# compute Gauss-Newton Matrix right-multiplied by `v` | |
Jv = T.Rop(self._s, parameters, v) | |
HJv = T.grad(T.sum(T.grad(cost, self._s) * Jv), self._s, consider_constant=[Jv]) | |
JHJv = T.grad(T.sum(HJv * self._s), parameters, consider_constant=[HJv, Jv]) | |
# apply Tikhonov damping | |
JHJv = [JHJvi + damp * vi for JHJvi, vi in zip(JHJv, v)] | |
return JHJv | |
def run(num_features, num_timesteps, batch_size=1): | |
# determine shapes of inputs and targets depending on the batch size | |
if batch_size == 1: | |
inputs_size = (num_timesteps, num_features) | |
targets_size = (num_timesteps, 1) | |
else: | |
inputs_size = (num_timesteps, batch_size, num_features) | |
targets_size = (num_timesteps, batch_size, 1) | |
# make inputs and targets shared variables | |
inputs = theano.shared(np.random.uniform(size=inputs_size).astype(floatX), borrow=True) | |
targets = theano.shared(np.random.uniform(size=targets_size).astype(floatX), borrow=True) | |
# create symbolic inputs and targets variables | |
x = T.matrix('inputs') if batch_size == 1 else T.tensor3('inputs') | |
t = T.matrix('targets') if batch_size == 1 else T.tensor3('inputs') | |
# create a set of parameters for a simple RNN | |
W_xh = theano.shared(0.01 * np.random.uniform(size=(num_features, 10)).astype(floatX), borrow=True) | |
W_hh = theano.shared(0.01 * np.random.uniform(size=(10, 10)).astype(floatX), borrow=True) | |
W_hy = theano.shared(0.01 * np.random.uniform(size=(10, 1)).astype(floatX), borrow=True) | |
b_h = theano.shared(np.zeros(10).astype(floatX), borrow=True) | |
b_y = theano.shared(np.zeros(1).astype(floatX), borrow=True) | |
params = [W_xh, W_hh, W_hy, b_h, b_y] | |
# recurrent function | |
def step(x_t, h_tm1): | |
h = T.tanh(T.dot(h_tm1, W_hh) + T.dot(x_t, W_xh) + b_h) | |
return h | |
# build recurrent graph | |
h_0 = T.alloc(0.0, 10) if batch_size == 1 else T.alloc(0.0, batch_size, 10) | |
h, updates = theano.scan(step, | |
sequences=[x], | |
outputs_info=[h_0]) | |
# network output | |
y = T.dot(h, W_hy) + b_y | |
# Create Gauss-Newton-Matrix object. Not really of any use here, but I | |
# need it for Hessian-Free optimization. | |
gn = GaussNewtonMatrix(y) | |
# compute MSE | |
cost = ((t - y)**2).sum(axis=1).mean() | |
# Compute the cost at some other point in the parameter space. Not really | |
# of any use here, but this is how I do it during certain iterations of CG | |
# in the HF algorithm. There, it's in fact `pi + current update proposal`. | |
# For simplicity, I just multiply by 2 here. | |
# ! NOTE: If you comment out the next line and remove the function output | |
# in line 85, it works for both cases. ! | |
cost_ = theano.clone(cost, replace=dict([(pi, 2*pi) for pi in params])) | |
# Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG, | |
# but for simplicity, I just take the parameters vector because it's | |
# already there. | |
Gv = gn(v=params, cost=cost, parameters=params, damp=T.constant(1.0)) | |
# compile Theano function | |
f = theano.function([], [cost_] + Gv, givens={x: inputs, t: targets}) | |
# execute | |
f() | |
if __name__ == '__main__': | |
# This runs fine. The batch size is set to something greater than 1, i.e. | |
# the data is represented by a tensor3 object. | |
run(100, 10, batch_size=5) | |
# This gives an error: | |
# ERROR (theano.gof.opt): Optimization failure due to: remove_constants_and_unused_inputs_scan | |
# The batch size is set to 1 and the data is represented by a matrix object. | |
run(100, 10, batch_size=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment