Created
April 13, 2015 14:24
-
-
Save kowey/4c45c65952c927c4698a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class LinkPack(namedtuple('LinkPack', | |
['labels', | |
'pairings', | |
'scores_ad', | |
'scores_l'])): | |
''' | |
Collection of candidate links | |
:param labels: list of labels (same length as the width of | |
scores_l) | |
:type labels: [string] | |
:param pairings: list of EDU pairs (length = number of samples) | |
:type pairings: [EDU] | |
:param scores_ad: directed attachment scores (length should | |
match that of pairings) | |
:type scores_ad: array(float) | |
:param scores_l: label attachment scores (width should be | |
number of labels; height should be number | |
of pairings) | |
:type scores_l: matrix(float) | |
''' | |
def take(self, indices): | |
''' | |
Return a subset of the links indicated by the list/array | |
of indices | |
''' | |
# pylint: disable=no-member | |
return LinkPack(labels=self.labels, | |
pairings=np.take(self.pairings, indices), | |
scores_ad=np.take(self.scores_ad, indices), | |
scores_l=np.take(self.scores_l, indices)) | |
# pylint: enable=no-member | |
def simple_candidates(self): | |
''' | |
Translate the links into a list of (EDU, EDU, float, string) | |
quadruplets representing a combined probability and best label | |
for each EDU pair. This is often good enough for simplistic | |
decoders | |
''' | |
# attachment * best label prob | |
# pylint: disable=no-member | |
scores = self.scores_ad * np.amax(self.scores_l, axis=1) | |
lbls = np.argmax(self.scores_l, axis=1) | |
# pylint: enable=no-member | |
return [(pair[0], pair[1], score, self.labels[lbl]) | |
for pair, score, lbl | |
in zip(self.pairings, scores, lbls)] | |
class Decoder(with_metaclass(ABCMeta, object)): | |
''' | |
A decoder is a function which given a probability distribution (see below) | |
and some control parameters, returns a sequence of predictions. | |
Most decoders only really return one prediction in practice, but some, | |
like the A* decoder might have able to return a ranked sequence of | |
the "N best" predictions it can find | |
We have a few informal types to consider here: | |
- a **link** (`(string, string, string)`) represents a link | |
between a pair of EDUs. The first two items are their | |
identifiers, and the third is the link label | |
- a **candidate link** (or candidate, to be short, | |
`(EDU, EDU, float, string)`) | |
is a link with a probability attached | |
- a **prediction** is morally a set (in practice a list) of links | |
- a **distribution** is morally a set of proposed links | |
''' | |
@abstractmethod | |
def decode(self, lpack): | |
''' | |
:type lpack: LinkPack | |
:rtype: [ [(string,string,string)] ] | |
''' | |
raise NotImplementedError | |
class PruningDecoder(with_metaclass(ABCMeta, Decoder)): | |
''' | |
A pruning decoder takes another decoder as input and does some | |
preprocessing on the candidate edges, removing some of them | |
before handing them off to its inner decoder | |
''' | |
def __init__(self, decoder): | |
self.decoder = decoder | |
def decode(self, lpack): | |
return self.decoder.decode(self.prune(lpack)) | |
@abstractmethod | |
def prune(self, lpack): | |
''' | |
Trim a set of proposed links | |
:rtype: LinkPack | |
''' | |
raise NotImplementedError |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment