Skip to content

Instantly share code, notes, and snippets.

@zomux
Created July 3, 2015 05:39
Show Gist options
  • Save zomux/45a6ae713cf3d2fda671 to your computer and use it in GitHub Desktop.
Save zomux/45a6ae713cf3d2fda671 to your computer and use it in GitHub Desktop.
class SoftmaxLayer(CostLayer):
"""
Softmax output layer.
"""
def _get_samples(self, model, length=30, temp=1, *inps):
"""
See parent class
"""
if not hasattr(model, 'word_indxs_src'):
model.word_indxs_src = model.word_indxs
character_level = False
if hasattr(model, 'character_level'):
character_level = model.character_level
if model.del_noise:
model.del_noise()
[values, probs] = model.sample_fn(length, temp, *inps)
#print 'Generated sample is:'
#print
if values.ndim > 1:
for d in xrange(2):
print '%d-th sentence' % d
print 'Input: ',
if character_level:
sen = []
for k in xrange(inps[0].shape[0]):
if model.word_indxs_src[inps[0][k][d]] == '<eol>':
break
sen.append(model.word_indxs_src[inps[0][k][d]])
print "".join(sen),
else:
for k in xrange(inps[0].shape[0]):
print model.word_indxs_src[inps[0][k][d]],
if model.word_indxs_src[inps[0][k][d]] == '<eol>':
break
print ''
print 'Output: ',
if character_level:
sen = []
for k in xrange(values.shape[0]):
if model.word_indxs[values[k][d]] == '<eol>':
break
sen.append(model.word_indxs[values[k][d]])
print "".join(sen),
else:
for k in xrange(values.shape[0]):
print model.word_indxs[values[k][d]],
if model.word_indxs[values[k][d]] == '<eol>':
break
print
print
else:
print 'Input: ',
if character_level:
sen = []
for k in xrange(inps[0].shape[0]):
if model.word_indxs_src[inps[0][k]] == '<eol>':
break
sen.append(model.word_indxs_src[inps[0][k]])
print "".join(sen),
else:
for k in xrange(inps[0].shape[0]):
print model.word_indxs_src[inps[0][k]],
if model.word_indxs_src[inps[0][k]] == '<eol>':
break
print ''
print 'Output: ',
if character_level:
sen = []
for k in xrange(values.shape[0]):
if model.word_indxs[values[k]] == '<eol>':
break
sen.append(model.word_indxs[values[k]])
print "".join(sen),
else:
for k in xrange(values.shape[0]):
print model.word_indxs[values[k]],
if model.word_indxs[values[k]] == '<eol>':
break
print
print
def fprop(self,
state_below,
temp=numpy.float32(1),
use_noise=True,
additional_inputs=None,
no_noise_bias=False,
target=None,
full_softmax=True):
"""
Forward pass through the cost layer.
:type state_below: tensor or layer
:param state_below: The theano expression (or groundhog layer)
representing the input of the cost layer
:type temp: float or tensor scalar
:param temp: scalar representing the temperature that should be used
when sampling from the output distribution
:type use_noise: bool
:param use_noise: flag. If true, noise is used when computing the
output of the model
:type no_noise_bias: bool
:param no_noise_bias: flag, stating if weight noise should be added
to the bias as well, or only to the weights
"""
if not full_softmax:
assert target != None, 'target must be given'
if self.rank_n_approx:
if self.weight_noise and use_noise and self.noise_params:
emb_val = self.rank_n_activ(utils.dot(state_below,
self.W_em1+self.nW_em1))
nW_em = self.nW_em2
else:
emb_val = self.rank_n_activ(utils.dot(state_below, self.W_em1))
W_em = self.W_em2
else:
W_em = self.W_em
if self.weight_noise:
nW_em = self.nW_em
emb_val = state_below
if full_softmax:
if self.weight_noise and use_noise and self.noise_params:
emb_val = TT.dot(emb_val, W_em + nW_em)
else:
emb_val = TT.dot(emb_val, W_em)
if additional_inputs:
if use_noise and self.noise_params:
for inp, weight, noise_weight in zip(
additional_inputs, self.additional_weights,
self.noise_additional_weights):
emb_val += utils.dot(inp, (noise_weight + weight))
else:
for inp, weight in zip(additional_inputs, self.additional_weights):
emb_val += utils.dot(inp, weight)
if self.weight_noise and use_noise and self.noise_params and \
not no_noise_bias:
emb_val = temp * (emb_val + self.b_em + self.nb_em)
else:
emb_val = temp * (emb_val + self.b_em)
else:
W_em = W_em[:, target]
if self.weight_noise:
nW_em = nW_em[:, target]
W_em += nW_em
if emb_val.ndim == 3:
emb_val = emb_val.reshape([emb_val.shape[0]*emb_val.shape[1], emb_val.shape[2]])
emb_val = (W_em.T * emb_val).sum(1) + self.b_em[target]
if self.weight_noise and use_noise:
emb_val += self.nb_em[target]
emb_val = temp * emb_val
self.preactiv = emb_val
if full_softmax:
emb_val = utils.softmax(emb_val)
else:
emb_val = TT.nnet.sigmoid(emb_val)
self.out = emb_val
self.state_below = state_below
self.model_output = emb_val
return emb_val
def compute_sample(self,
state_below,
temp=1,
use_noise=False,
additional_inputs=None):
class_probs = self.fprop(state_below,
temp=temp,
additional_inputs=additional_inputs,
use_noise=use_noise)
pvals = class_probs
if pvals.ndim == 1:
pvals = pvals.dimshuffle('x', 0)
sample = self.trng.multinomial(pvals=pvals,
dtype='int64').argmax(axis=-1)
if class_probs.ndim == 1:
sample = sample[0]
self.sample = sample
return sample
def get_cost(self,
state_below,
target=None,
mask=None,
temp=1,
reg=None,
scale=None,
sum_over_time=False,
no_noise_bias=False,
additional_inputs=None,
use_noise=True):
"""
See parent class
"""
def _grab_probs(class_probs, target):
shape0 = class_probs.shape[0]
shape1 = class_probs.shape[1]
target_ndim = target.ndim
target_shape = target.shape
if target.ndim > 1:
target = target.flatten()
assert target.ndim == 1, 'make sure target is a vector of ints'
assert 'int' in target.dtype
pos = TT.arange(shape0)*shape1
new_targ = target + pos
return class_probs.flatten()[new_targ]
assert target, 'Computing the cost requires a target'
target_shape = target.shape
target_ndim = target.ndim
target_shape = target.shape
if self.use_nce:
logger.debug("Using NCE")
# positive samples: true targets
class_probs = self.fprop(state_below,
temp=temp,
use_noise=use_noise,
additional_inputs=additional_inputs,
no_noise_bias=no_noise_bias,
target=target.flatten(),
full_softmax=False)
# negative samples: a single uniform random sample per training sample
nsamples = TT.cast(self.trng.uniform(class_probs.shape[0].reshape([1])) * self.n_out, 'int64')
neg_probs = self.fprop(state_below,
temp=temp,
use_noise=use_noise,
additional_inputs=additional_inputs,
no_noise_bias=no_noise_bias,
target=nsamples.flatten(),
full_softmax=False)
cost_target = class_probs
cost_nsamples = 1. - neg_probs
cost = -TT.log(cost_target)
cost = cost - TT.cast(neg_probs.shape[0], 'float32') * TT.log(cost_nsamples)
else:
class_probs = self.fprop(state_below,
temp=temp,
use_noise=use_noise,
additional_inputs=additional_inputs,
no_noise_bias=no_noise_bias)
cost = -TT.log(_grab_probs(class_probs, target))
self.word_probs = TT.exp(-cost.reshape(target_shape))
# Set all the probs after the end-of-line to one
if mask:
self.word_probs = self.word_probs * mask + 1 - mask
if mask:
cost = cost * TT.cast(mask.flatten(), theano.config.floatX)
self.cost_per_sample = (cost.reshape(target_shape).sum(axis=0)
if target_ndim > 1
else cost)
if sum_over_time is None:
sum_over_time = self.sum_over_time
if sum_over_time:
if state_below.ndim == 3:
cost = cost.reshape((state_below.shape[0],
state_below.shape[1]))
self.cost = cost.mean(1).sum()
else:
self.cost = cost.sum()
else:
self.cost = cost.mean()
if scale:
self.cost = self.cost*scale
if reg:
self.cost = self.cost + reg
self.mask = mask
self.cost_scale = scale
return self.cost
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment