zomux · July 3, 2015 05:39
diff --git a/ground_hog_softmax.py b/ground_hog_softmax.py
 class SoftmaxLayer(CostLayer):
    """
    Softmax output layer.
    """

    def _get_samples(self, model, length=30, temp=1, *inps):
        """
        See parent class
        """
        if not hasattr(model, 'word_indxs_src'):
            model.word_indxs_src = model.word_indxs

        character_level = False
        if hasattr(model, 'character_level'):
            character_level = model.character_level
        if model.del_noise:
            model.del_noise()
        [values, probs] = model.sample_fn(length, temp, *inps)
        #print 'Generated sample is:'
        #print
        if values.ndim > 1:
            for d in xrange(2):
                print '%d-th sentence' % d
                print 'Input: ',
                if character_level:
                    sen = []
                    for k in xrange(inps[0].shape[0]):
                        if model.word_indxs_src[inps[0][k][d]] == '<eol>':
                            break
                        sen.append(model.word_indxs_src[inps[0][k][d]])
                    print "".join(sen),
                else:
                    for k in xrange(inps[0].shape[0]):
                        print model.word_indxs_src[inps[0][k][d]],
                        if model.word_indxs_src[inps[0][k][d]] == '<eol>':
                            break
                print ''
                print 'Output: ',
                if character_level:
                    sen = []
                    for k in xrange(values.shape[0]):
                        if model.word_indxs[values[k][d]] == '<eol>':
                            break
                        sen.append(model.word_indxs[values[k][d]])
                    print "".join(sen),
                else:
                    for k in xrange(values.shape[0]):
                        print model.word_indxs[values[k][d]],
                        if model.word_indxs[values[k][d]] == '<eol>':
                            break
                print
                print
        else:
            print 'Input:  ',
            if character_level:
                sen = []
                for k in xrange(inps[0].shape[0]):
                    if model.word_indxs_src[inps[0][k]] == '<eol>':
                        break
                    sen.append(model.word_indxs_src[inps[0][k]])
                print "".join(sen),
            else:
                for k in xrange(inps[0].shape[0]):
                    print model.word_indxs_src[inps[0][k]],
                    if model.word_indxs_src[inps[0][k]] == '<eol>':
                        break
            print ''
            print 'Output: ',
            if character_level:
                sen = []
                for k in xrange(values.shape[0]):
                    if model.word_indxs[values[k]] == '<eol>':
                        break
                    sen.append(model.word_indxs[values[k]])
                print "".join(sen),
            else:
                for k in xrange(values.shape[0]):
                    print model.word_indxs[values[k]],
                    if model.word_indxs[values[k]] == '<eol>':
                        break
            print
            print

    def fprop(self,
              state_below,
              temp=numpy.float32(1),
              use_noise=True,
              additional_inputs=None,
              no_noise_bias=False,
              target=None,
              full_softmax=True):
        """
        Forward pass through the cost layer.

        :type state_below: tensor or layer
        :param state_below: The theano expression (or groundhog layer)
            representing the input of the cost layer

        :type temp: float or tensor scalar
        :param temp: scalar representing the temperature that should be used
            when sampling from the output distribution

        :type use_noise: bool
        :param use_noise: flag. If true, noise is used when computing the
            output of the model

        :type no_noise_bias: bool
        :param no_noise_bias: flag, stating if weight noise should be added
            to the bias as well, or only to the weights
        """
        if not full_softmax:
            assert target != None, 'target must be given'
        if self.rank_n_approx:
            if self.weight_noise and use_noise and self.noise_params:
                emb_val = self.rank_n_activ(utils.dot(state_below,
                                                      self.W_em1+self.nW_em1))
                nW_em = self.nW_em2
            else:
                emb_val = self.rank_n_activ(utils.dot(state_below, self.W_em1))
            W_em = self.W_em2
        else:
            W_em = self.W_em
            if self.weight_noise:
                nW_em = self.nW_em
            emb_val = state_below

        if full_softmax:
            if self.weight_noise and use_noise and self.noise_params:
                emb_val = TT.dot(emb_val, W_em + nW_em)
            else:
                emb_val = TT.dot(emb_val, W_em)

            if additional_inputs:
                if use_noise and self.noise_params:
                    for inp, weight, noise_weight in zip(
                        additional_inputs, self.additional_weights,
                        self.noise_additional_weights):
                        emb_val += utils.dot(inp, (noise_weight + weight))
                else:
                    for inp, weight in zip(additional_inputs, self.additional_weights):
                        emb_val += utils.dot(inp, weight)
            if self.weight_noise and use_noise and self.noise_params and \
               not no_noise_bias:
                emb_val = temp * (emb_val + self.b_em + self.nb_em)
            else:
                emb_val = temp * (emb_val + self.b_em)
        else:
            W_em = W_em[:, target]
            if self.weight_noise:
                nW_em = nW_em[:, target]
                W_em += nW_em
            if emb_val.ndim == 3:
                emb_val = emb_val.reshape([emb_val.shape[0]*emb_val.shape[1], emb_val.shape[2]])
            emb_val = (W_em.T * emb_val).sum(1) + self.b_em[target]
            if self.weight_noise and use_noise:
                emb_val += self.nb_em[target]
            emb_val = temp * emb_val

        self.preactiv = emb_val
        if full_softmax:
            emb_val = utils.softmax(emb_val)
        else:
            emb_val = TT.nnet.sigmoid(emb_val)
        self.out = emb_val
        self.state_below = state_below
        self.model_output = emb_val
        return emb_val

    def compute_sample(self,
                       state_below,
                       temp=1,
                       use_noise=False,
                       additional_inputs=None):

        class_probs = self.fprop(state_below,
                                 temp=temp,
                                 additional_inputs=additional_inputs,
                                 use_noise=use_noise)
        pvals = class_probs
        if pvals.ndim == 1:
            pvals = pvals.dimshuffle('x', 0)
        sample = self.trng.multinomial(pvals=pvals,
                                       dtype='int64').argmax(axis=-1)
        if class_probs.ndim == 1:
            sample = sample[0]
        self.sample = sample
        return sample

    def get_cost(self,
                 state_below,
                 target=None,
                 mask=None,
                 temp=1,
                 reg=None,
                 scale=None,
                 sum_over_time=False,
                 no_noise_bias=False,
                 additional_inputs=None,
                 use_noise=True):
        """
        See parent class
        """

        def _grab_probs(class_probs, target):
            shape0 = class_probs.shape[0]
            shape1 = class_probs.shape[1]
            target_ndim = target.ndim
            target_shape = target.shape
            if target.ndim > 1:
                target = target.flatten()
            assert target.ndim == 1, 'make sure target is a vector of ints'
            assert 'int' in target.dtype

            pos = TT.arange(shape0)*shape1
            new_targ = target + pos
            return class_probs.flatten()[new_targ]

        assert target, 'Computing the cost requires a target'
        target_shape = target.shape
        target_ndim = target.ndim
        target_shape = target.shape

        if self.use_nce:
            logger.debug("Using NCE")

            # positive samples: true targets
            class_probs = self.fprop(state_below,
                                     temp=temp,
                                     use_noise=use_noise,
                                     additional_inputs=additional_inputs,
                                     no_noise_bias=no_noise_bias,
                                     target=target.flatten(),
                                     full_softmax=False)
            # negative samples: a single uniform random sample per training sample
            nsamples = TT.cast(self.trng.uniform(class_probs.shape[0].reshape([1])) * self.n_out, 'int64')
            neg_probs = self.fprop(state_below,
                                     temp=temp,
                                     use_noise=use_noise,
                                     additional_inputs=additional_inputs,
                                     no_noise_bias=no_noise_bias,
                                     target=nsamples.flatten(),
                                     full_softmax=False)

            cost_target = class_probs
            cost_nsamples = 1. - neg_probs

            cost = -TT.log(cost_target)
            cost = cost - TT.cast(neg_probs.shape[0], 'float32') * TT.log(cost_nsamples)
        else:
            class_probs = self.fprop(state_below,
                                     temp=temp,
                                     use_noise=use_noise,
                                     additional_inputs=additional_inputs,
                                     no_noise_bias=no_noise_bias)
            cost = -TT.log(_grab_probs(class_probs, target))

        self.word_probs = TT.exp(-cost.reshape(target_shape))
        # Set all the probs after the end-of-line to one
        if mask:
            self.word_probs = self.word_probs * mask + 1 - mask
        if mask:
            cost = cost * TT.cast(mask.flatten(), theano.config.floatX)
        self.cost_per_sample = (cost.reshape(target_shape).sum(axis=0)
                if target_ndim > 1
                else cost)

        if sum_over_time is None:
            sum_over_time = self.sum_over_time
        if sum_over_time:
            if state_below.ndim == 3:
                cost = cost.reshape((state_below.shape[0],
                                     state_below.shape[1]))
                self.cost = cost.mean(1).sum()
            else:
                self.cost = cost.sum()
        else:
            self.cost = cost.mean()
        if scale:
            self.cost = self.cost*scale
        if reg:
            self.cost = self.cost + reg
        self.mask = mask
        self.cost_scale = scale
        return self.cost
	class SoftmaxLayer(CostLayer):
	"""
	Softmax output layer.
	"""

	def _get_samples(self, model, length=30, temp=1, *inps):
	"""
	See parent class
	"""
	if not hasattr(model, 'word_indxs_src'):
	model.word_indxs_src = model.word_indxs

	character_level = False
	if hasattr(model, 'character_level'):
	character_level = model.character_level
	if model.del_noise:
	model.del_noise()
	[values, probs] = model.sample_fn(length, temp, *inps)
	#print 'Generated sample is:'
	#print
	if values.ndim > 1:
	for d in xrange(2):
	print '%d-th sentence' % d
	print 'Input: ',
	if character_level:
	sen = []
	for k in xrange(inps[0].shape[0]):
	if model.word_indxs_src[inps[0][k][d]] == '<eol>':
	break
	sen.append(model.word_indxs_src[inps[0][k][d]])
	print "".join(sen),
	else:
	for k in xrange(inps[0].shape[0]):
	print model.word_indxs_src[inps[0][k][d]],
	if model.word_indxs_src[inps[0][k][d]] == '<eol>':
	break
	print ''
	print 'Output: ',
	if character_level:
	sen = []
	for k in xrange(values.shape[0]):
	if model.word_indxs[values[k][d]] == '<eol>':
	break
	sen.append(model.word_indxs[values[k][d]])
	print "".join(sen),
	else:
	for k in xrange(values.shape[0]):
	print model.word_indxs[values[k][d]],
	if model.word_indxs[values[k][d]] == '<eol>':
	break
	print
	print
	else:
	print 'Input: ',
	if character_level:
	sen = []
	for k in xrange(inps[0].shape[0]):
	if model.word_indxs_src[inps[0][k]] == '<eol>':
	break
	sen.append(model.word_indxs_src[inps[0][k]])
	print "".join(sen),
	else:
	for k in xrange(inps[0].shape[0]):
	print model.word_indxs_src[inps[0][k]],
	if model.word_indxs_src[inps[0][k]] == '<eol>':
	break
	print ''
	print 'Output: ',
	if character_level:
	sen = []
	for k in xrange(values.shape[0]):
	if model.word_indxs[values[k]] == '<eol>':
	break
	sen.append(model.word_indxs[values[k]])
	print "".join(sen),
	else:
	for k in xrange(values.shape[0]):
	print model.word_indxs[values[k]],
	if model.word_indxs[values[k]] == '<eol>':
	break
	print
	print

	def fprop(self,
	state_below,
	temp=numpy.float32(1),
	use_noise=True,
	additional_inputs=None,
	no_noise_bias=False,
	target=None,
	full_softmax=True):
	"""
	Forward pass through the cost layer.

	:type state_below: tensor or layer
	:param state_below: The theano expression (or groundhog layer)
	representing the input of the cost layer

	:type temp: float or tensor scalar
	:param temp: scalar representing the temperature that should be used
	when sampling from the output distribution

	:type use_noise: bool
	:param use_noise: flag. If true, noise is used when computing the
	output of the model

	:type no_noise_bias: bool
	:param no_noise_bias: flag, stating if weight noise should be added
	to the bias as well, or only to the weights
	"""
	if not full_softmax:
	assert target != None, 'target must be given'
	if self.rank_n_approx:
	if self.weight_noise and use_noise and self.noise_params:
	emb_val = self.rank_n_activ(utils.dot(state_below,
	self.W_em1+self.nW_em1))
	nW_em = self.nW_em2
	else:
	emb_val = self.rank_n_activ(utils.dot(state_below, self.W_em1))
	W_em = self.W_em2
	else:
	W_em = self.W_em
	if self.weight_noise:
	nW_em = self.nW_em
	emb_val = state_below

	if full_softmax:
	if self.weight_noise and use_noise and self.noise_params:
	emb_val = TT.dot(emb_val, W_em + nW_em)
	else:
	emb_val = TT.dot(emb_val, W_em)

	if additional_inputs:
	if use_noise and self.noise_params:
	for inp, weight, noise_weight in zip(
	additional_inputs, self.additional_weights,
	self.noise_additional_weights):
	emb_val += utils.dot(inp, (noise_weight + weight))
	else:
	for inp, weight in zip(additional_inputs, self.additional_weights):
	emb_val += utils.dot(inp, weight)
	if self.weight_noise and use_noise and self.noise_params and \
	not no_noise_bias:
	emb_val = temp * (emb_val + self.b_em + self.nb_em)
	else:
	emb_val = temp * (emb_val + self.b_em)
	else:
	W_em = W_em[:, target]
	if self.weight_noise:
	nW_em = nW_em[:, target]
	W_em += nW_em
	if emb_val.ndim == 3:
	emb_val = emb_val.reshape([emb_val.shape[0]*emb_val.shape[1], emb_val.shape[2]])
	emb_val = (W_em.T * emb_val).sum(1) + self.b_em[target]
	if self.weight_noise and use_noise:
	emb_val += self.nb_em[target]
	emb_val = temp * emb_val

	self.preactiv = emb_val
	if full_softmax:
	emb_val = utils.softmax(emb_val)
	else:
	emb_val = TT.nnet.sigmoid(emb_val)
	self.out = emb_val
	self.state_below = state_below
	self.model_output = emb_val
	return emb_val

	def compute_sample(self,
	state_below,
	temp=1,
	use_noise=False,
	additional_inputs=None):

	class_probs = self.fprop(state_below,
	temp=temp,
	additional_inputs=additional_inputs,
	use_noise=use_noise)
	pvals = class_probs
	if pvals.ndim == 1:
	pvals = pvals.dimshuffle('x', 0)
	sample = self.trng.multinomial(pvals=pvals,
	dtype='int64').argmax(axis=-1)
	if class_probs.ndim == 1:
	sample = sample[0]
	self.sample = sample
	return sample

	def get_cost(self,
	state_below,
	target=None,
	mask=None,
	temp=1,
	reg=None,
	scale=None,
	sum_over_time=False,
	no_noise_bias=False,
	additional_inputs=None,
	use_noise=True):
	"""
	See parent class
	"""

	def _grab_probs(class_probs, target):
	shape0 = class_probs.shape[0]
	shape1 = class_probs.shape[1]
	target_ndim = target.ndim
	target_shape = target.shape
	if target.ndim > 1:
	target = target.flatten()
	assert target.ndim == 1, 'make sure target is a vector of ints'
	assert 'int' in target.dtype

	pos = TT.arange(shape0)*shape1
	new_targ = target + pos
	return class_probs.flatten()[new_targ]

	assert target, 'Computing the cost requires a target'
	target_shape = target.shape
	target_ndim = target.ndim
	target_shape = target.shape

	if self.use_nce:
	logger.debug("Using NCE")

	# positive samples: true targets
	class_probs = self.fprop(state_below,
	temp=temp,
	use_noise=use_noise,
	additional_inputs=additional_inputs,
	no_noise_bias=no_noise_bias,
	target=target.flatten(),
	full_softmax=False)
	# negative samples: a single uniform random sample per training sample
	nsamples = TT.cast(self.trng.uniform(class_probs.shape[0].reshape([1])) * self.n_out, 'int64')
	neg_probs = self.fprop(state_below,
	temp=temp,
	use_noise=use_noise,
	additional_inputs=additional_inputs,
	no_noise_bias=no_noise_bias,
	target=nsamples.flatten(),
	full_softmax=False)

	cost_target = class_probs
	cost_nsamples = 1. - neg_probs

	cost = -TT.log(cost_target)
	cost = cost - TT.cast(neg_probs.shape[0], 'float32') * TT.log(cost_nsamples)
	else:
	class_probs = self.fprop(state_below,
	temp=temp,
	use_noise=use_noise,
	additional_inputs=additional_inputs,
	no_noise_bias=no_noise_bias)
	cost = -TT.log(_grab_probs(class_probs, target))

	self.word_probs = TT.exp(-cost.reshape(target_shape))
	# Set all the probs after the end-of-line to one
	if mask:
	self.word_probs = self.word_probs * mask + 1 - mask
	if mask:
	cost = cost * TT.cast(mask.flatten(), theano.config.floatX)
	self.cost_per_sample = (cost.reshape(target_shape).sum(axis=0)
	if target_ndim > 1
	else cost)

	if sum_over_time is None:
	sum_over_time = self.sum_over_time
	if sum_over_time:
	if state_below.ndim == 3:
	cost = cost.reshape((state_below.shape[0],
	state_below.shape[1]))
	self.cost = cost.mean(1).sum()
	else:
	self.cost = cost.sum()
	else:
	self.cost = cost.mean()
	if scale:
	self.cost = self.cost*scale
	if reg:
	self.cost = self.cost + reg
	self.mask = mask
	self.cost_scale = scale
	return self.cost