class Annotator(object):
def __init__(self, nlp, **kwargs):
# `nlp` is a spaCy Language object
self.nlp = nlp
def annotate(self, agr, rules):
""" `agr` (short for Agreement) is a tree-like data structure.
A node of the tree (class `Element`) has a `tag` (e.g. PREAMBLE, SECTION) and may have
`content` (e.g. textual data), `annotations` (string identifiers of `Rule`s)
and one or many `children`.
"""
""" `rules` is a dictionary with string keys and values of class `Rule`.
`Rule`s contain regex-like `patterns` using Universal Dependencies and `applicable_tags`
for targeting specific Agreement node types (the `tag` from `Element`)
"""
# ... pseudo-code for possible implementation details
# for each node in agr:
# foreach key, rule in rules:
# - SKIP UNLESS node.tag IN rule.applicable_tags
#
# - make spaCy Doc object out of node.content
# - IF rule matches Doc object
# -- append key to node.annotations
return agr
Last active
June 24, 2018 20:29
-
-
Save ursachec/2bb324ba508961990dd241eab47d3427 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment