Created
July 3, 2015 05:39
-
-
Save zomux/45a6ae713cf3d2fda671 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SoftmaxLayer(CostLayer): | |
""" | |
Softmax output layer. | |
""" | |
def _get_samples(self, model, length=30, temp=1, *inps): | |
""" | |
See parent class | |
""" | |
if not hasattr(model, 'word_indxs_src'): | |
model.word_indxs_src = model.word_indxs | |
character_level = False | |
if hasattr(model, 'character_level'): | |
character_level = model.character_level | |
if model.del_noise: | |
model.del_noise() | |
[values, probs] = model.sample_fn(length, temp, *inps) | |
#print 'Generated sample is:' | |
if values.ndim > 1: | |
for d in xrange(2): | |
print '%d-th sentence' % d | |
print 'Input: ', | |
if character_level: | |
sen = [] | |
for k in xrange(inps[0].shape[0]): | |
if model.word_indxs_src[inps[0][k][d]] == '<eol>': | |
break | |
sen.append(model.word_indxs_src[inps[0][k][d]]) | |
print "".join(sen), | |
else: | |
for k in xrange(inps[0].shape[0]): | |
print model.word_indxs_src[inps[0][k][d]], | |
if model.word_indxs_src[inps[0][k][d]] == '<eol>': | |
break | |
print '' | |
print 'Output: ', | |
if character_level: | |
sen = [] | |
for k in xrange(values.shape[0]): | |
if model.word_indxs[values[k][d]] == '<eol>': | |
break | |
sen.append(model.word_indxs[values[k][d]]) | |
print "".join(sen), | |
else: | |
for k in xrange(values.shape[0]): | |
print model.word_indxs[values[k][d]], | |
if model.word_indxs[values[k][d]] == '<eol>': | |
break | |
else: | |
print 'Input: ', | |
if character_level: | |
sen = [] | |
for k in xrange(inps[0].shape[0]): | |
if model.word_indxs_src[inps[0][k]] == '<eol>': | |
break | |
sen.append(model.word_indxs_src[inps[0][k]]) | |
print "".join(sen), | |
else: | |
for k in xrange(inps[0].shape[0]): | |
print model.word_indxs_src[inps[0][k]], | |
if model.word_indxs_src[inps[0][k]] == '<eol>': | |
break | |
print '' | |
print 'Output: ', | |
if character_level: | |
sen = [] | |
for k in xrange(values.shape[0]): | |
if model.word_indxs[values[k]] == '<eol>': | |
break | |
sen.append(model.word_indxs[values[k]]) | |
print "".join(sen), | |
else: | |
for k in xrange(values.shape[0]): | |
print model.word_indxs[values[k]], | |
if model.word_indxs[values[k]] == '<eol>': | |
break | |
def fprop(self, | |
state_below, | |
temp=numpy.float32(1), | |
use_noise=True, | |
additional_inputs=None, | |
no_noise_bias=False, | |
target=None, | |
full_softmax=True): | |
""" | |
Forward pass through the cost layer. | |
:type state_below: tensor or layer | |
:param state_below: The theano expression (or groundhog layer) | |
representing the input of the cost layer | |
:type temp: float or tensor scalar | |
:param temp: scalar representing the temperature that should be used | |
when sampling from the output distribution | |
:type use_noise: bool | |
:param use_noise: flag. If true, noise is used when computing the | |
output of the model | |
:type no_noise_bias: bool | |
:param no_noise_bias: flag, stating if weight noise should be added | |
to the bias as well, or only to the weights | |
""" | |
if not full_softmax: | |
assert target != None, 'target must be given' | |
if self.rank_n_approx: | |
if self.weight_noise and use_noise and self.noise_params: | |
emb_val = self.rank_n_activ(utils.dot(state_below, | |
self.W_em1+self.nW_em1)) | |
nW_em = self.nW_em2 | |
else: | |
emb_val = self.rank_n_activ(utils.dot(state_below, self.W_em1)) | |
W_em = self.W_em2 | |
else: | |
W_em = self.W_em | |
if self.weight_noise: | |
nW_em = self.nW_em | |
emb_val = state_below | |
if full_softmax: | |
if self.weight_noise and use_noise and self.noise_params: | |
emb_val = TT.dot(emb_val, W_em + nW_em) | |
else: | |
emb_val = TT.dot(emb_val, W_em) | |
if additional_inputs: | |
if use_noise and self.noise_params: | |
for inp, weight, noise_weight in zip( | |
additional_inputs, self.additional_weights, | |
self.noise_additional_weights): | |
emb_val += utils.dot(inp, (noise_weight + weight)) | |
else: | |
for inp, weight in zip(additional_inputs, self.additional_weights): | |
emb_val += utils.dot(inp, weight) | |
if self.weight_noise and use_noise and self.noise_params and \ | |
not no_noise_bias: | |
emb_val = temp * (emb_val + self.b_em + self.nb_em) | |
else: | |
emb_val = temp * (emb_val + self.b_em) | |
else: | |
W_em = W_em[:, target] | |
if self.weight_noise: | |
nW_em = nW_em[:, target] | |
W_em += nW_em | |
if emb_val.ndim == 3: | |
emb_val = emb_val.reshape([emb_val.shape[0]*emb_val.shape[1], emb_val.shape[2]]) | |
emb_val = (W_em.T * emb_val).sum(1) + self.b_em[target] | |
if self.weight_noise and use_noise: | |
emb_val += self.nb_em[target] | |
emb_val = temp * emb_val | |
self.preactiv = emb_val | |
if full_softmax: | |
emb_val = utils.softmax(emb_val) | |
else: | |
emb_val = TT.nnet.sigmoid(emb_val) | |
self.out = emb_val | |
self.state_below = state_below | |
self.model_output = emb_val | |
return emb_val | |
def compute_sample(self, | |
state_below, | |
temp=1, | |
use_noise=False, | |
additional_inputs=None): | |
class_probs = self.fprop(state_below, | |
temp=temp, | |
additional_inputs=additional_inputs, | |
use_noise=use_noise) | |
pvals = class_probs | |
if pvals.ndim == 1: | |
pvals = pvals.dimshuffle('x', 0) | |
sample = self.trng.multinomial(pvals=pvals, | |
dtype='int64').argmax(axis=-1) | |
if class_probs.ndim == 1: | |
sample = sample[0] | |
self.sample = sample | |
return sample | |
def get_cost(self, | |
state_below, | |
target=None, | |
mask=None, | |
temp=1, | |
reg=None, | |
scale=None, | |
sum_over_time=False, | |
no_noise_bias=False, | |
additional_inputs=None, | |
use_noise=True): | |
""" | |
See parent class | |
""" | |
def _grab_probs(class_probs, target): | |
shape0 = class_probs.shape[0] | |
shape1 = class_probs.shape[1] | |
target_ndim = target.ndim | |
target_shape = target.shape | |
if target.ndim > 1: | |
target = target.flatten() | |
assert target.ndim == 1, 'make sure target is a vector of ints' | |
assert 'int' in target.dtype | |
pos = TT.arange(shape0)*shape1 | |
new_targ = target + pos | |
return class_probs.flatten()[new_targ] | |
assert target, 'Computing the cost requires a target' | |
target_shape = target.shape | |
target_ndim = target.ndim | |
target_shape = target.shape | |
if self.use_nce: | |
logger.debug("Using NCE") | |
# positive samples: true targets | |
class_probs = self.fprop(state_below, | |
temp=temp, | |
use_noise=use_noise, | |
additional_inputs=additional_inputs, | |
no_noise_bias=no_noise_bias, | |
target=target.flatten(), | |
full_softmax=False) | |
# negative samples: a single uniform random sample per training sample | |
nsamples = TT.cast(self.trng.uniform(class_probs.shape[0].reshape([1])) * self.n_out, 'int64') | |
neg_probs = self.fprop(state_below, | |
temp=temp, | |
use_noise=use_noise, | |
additional_inputs=additional_inputs, | |
no_noise_bias=no_noise_bias, | |
target=nsamples.flatten(), | |
full_softmax=False) | |
cost_target = class_probs | |
cost_nsamples = 1. - neg_probs | |
cost = -TT.log(cost_target) | |
cost = cost - TT.cast(neg_probs.shape[0], 'float32') * TT.log(cost_nsamples) | |
else: | |
class_probs = self.fprop(state_below, | |
temp=temp, | |
use_noise=use_noise, | |
additional_inputs=additional_inputs, | |
no_noise_bias=no_noise_bias) | |
cost = -TT.log(_grab_probs(class_probs, target)) | |
self.word_probs = TT.exp(-cost.reshape(target_shape)) | |
# Set all the probs after the end-of-line to one | |
if mask: | |
self.word_probs = self.word_probs * mask + 1 - mask | |
if mask: | |
cost = cost * TT.cast(mask.flatten(), theano.config.floatX) | |
self.cost_per_sample = (cost.reshape(target_shape).sum(axis=0) | |
if target_ndim > 1 | |
else cost) | |
if sum_over_time is None: | |
sum_over_time = self.sum_over_time | |
if sum_over_time: | |
if state_below.ndim == 3: | |
cost = cost.reshape((state_below.shape[0], | |
state_below.shape[1])) | |
self.cost = cost.mean(1).sum() | |
else: | |
self.cost = cost.sum() | |
else: | |
self.cost = cost.mean() | |
if scale: | |
self.cost = self.cost*scale | |
if reg: | |
self.cost = self.cost + reg | |
self.mask = mask | |
self.cost_scale = scale | |
return self.cost |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment