beam search for Keras RNN
# variation to
def keras_rnn_predict(samples, empty=empty, rnn_model=model, maxlen=maxlen):
"""for every sample, calculate probability for every possible label
you need to supply your RNN model and maxlen - the length of sequences it can handle
data = sequence.pad_sequences(samples, maxlen=maxlen, value=empty)
return rnn_model.predict(data, verbose=0)
def beamsearch(predict=keras_rnn_predict,
k=1, maxsample=400, use_unk=False, oov=oov, empty=empty, eos=eos):
"""return k samples (beams) and their NLL scores, each sample is a sequence of labels,
all samples starts with an `empty` label and end with `eos` or truncated to length of `maxsample`.
You need to supply `predict` which returns the label probability of each sample.
`use_unk` allow usage of `oov` (out-of-vocabulary) label in samples
dead_k = 0 # samples that reached eos
dead_samples = []
dead_scores = []
live_k = 1 # samples that did not yet reached eos
live_samples = [[empty]]
live_scores = [0]
while live_k and dead_k < k:
# for every possible live sample calc prob for every possible label
probs = predict(live_samples, empty=empty)
# total score for every sample is sum of -log of word prb
cand_scores = np.array(live_scores)[:,None] - np.log(probs)
if not use_unk and oov is not None:
cand_scores[:,oov] = 1e20
cand_flat = cand_scores.flatten()
# find the best (lowest) scores we have from all possible samples and new words
ranks_flat = cand_flat.argsort()[:(k-dead_k)]
live_scores = cand_flat[ranks_flat]
# append the new words to their appropriate live sample
voc_size = probs.shape[1]
live_samples = [live_samples[r//voc_size]+[r%voc_size] for r in ranks_flat]
# live samples that should be dead are...
zombie = [s[-1] == eos or len(s) >= maxsample for s in live_samples]
# add zombies to the dead
dead_samples += [s for s,z in zip(live_samples,zombie) if z] # remove first label == empty
dead_scores += [s for s,z in zip(live_scores,zombie) if z]
dead_k = len(dead_samples)
# remove zombies from the living
live_samples = [s for s,z in zip(live_samples,zombie) if not z]
live_scores = [s for s,z in zip(live_scores,zombie) if not z]
live_k = len(live_samples)
return dead_samples + live_samples, dead_scores + live_scores
