-
-
Save chssch/f788cfd227cb94d0843235a2542026fd to your computer and use it in GitHub Desktop.
# This small script shows how to use AllenNLP Semantic Role Labeling (http://allennlp.org/) with SpaCy 2.0 (http://spacy.io) components and extensions | |
# Script installs allennlp default model | |
# Important: Install allennlp form source and replace the spacy requirement with spacy-nightly in the requirements.txt | |
# Developed for SpaCy 2.0.0a18 | |
from allennlp.commands import DEFAULT_MODELS | |
from allennlp.common.file_utils import cached_path | |
from allennlp.service.predictors import SemanticRoleLabelerPredictor | |
from allennlp.models.archival import load_archive | |
import spacy | |
from spacy.tokens import Token | |
class SRLComponent(object): | |
''' | |
A SpaCy pipeline component for SRL | |
''' | |
name = 'Semantic Role Labeler' | |
def __init__(self): | |
archive = load_archive(self._get_srl_model()) | |
self.predictor = SemanticRoleLabelerPredictor.from_archive(archive, "semantic-role-labeling") | |
Token.set_extension('srl_arg0') | |
Token.set_extension('srl_arg1') | |
def __call__(self, doc): | |
# See https://github.com/allenai/allennlp/blob/master/allennlp/service/predictors/semantic_role_labeler.py#L74 | |
words = [token.text for token in doc] | |
for i, word in enumerate(doc): | |
if word.pos_ == "VERB": | |
verb = word.text | |
verb_labels = [0 for _ in words] | |
verb_labels[i] = 1 | |
instance = self.predictor._dataset_reader.text_to_instance(doc, verb_labels) | |
output = self.predictor._model.forward_on_instance(instance, -1) | |
tags = output['tags'] | |
# TODO: Tagging/dependencies can be done more elegant | |
if "B-ARG0" in tags: | |
start = tags.index("B-ARG0") | |
end = max([i for i, x in enumerate(tags) if x == "I-ARG0"] + [start]) + 1 | |
word._.set("srl_arg0", doc[start:end]) | |
if "B-ARG1" in tags: | |
start = tags.index("B-ARG1") | |
end = max([i for i, x in enumerate(tags) if x == "I-ARG1"] + [start]) + 1 | |
word._.set("srl_arg1", doc[start:end]) | |
return doc | |
def _get_srl_model(self): | |
return cached_path(DEFAULT_MODELS['semantic-role-labeling']) | |
def demo(): | |
nlp = spacy.load("en") | |
nlp.add_pipe(SRLComponent(), after='ner') | |
doc = nlp("Apple sold 1 million Plumbuses this month.") | |
for w in doc: | |
if w.pos_ == "VERB": | |
print("('{}', '{}', '{}')".format(w._.srl_arg0, w, w._.srl_arg1)) | |
# ('Apple', 'sold', '1 million Plumbuses) | |
After I call demo method got this error.
File "spacy_srl.py", line 65, in
demo()
File "spacy_srl.py", line 58, in demo
nlp.add_pipe(SRLComponent(), after='ner')
File "spacy_srl.py", line 22, in init
archive = load_archive(self._get_srl_model())
File "spacy_srl.py", line 53, in _get_srl_model
return cached_path(DEFAULT_MODELS['semantic-role-labeling'])
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/allennlp/common/file_utils.py", line 59, in cached_path
parsed = urlparse(url_or_filename)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/parse.py", line 365, in urlparse
url, scheme, _coerce_result = _coerce_args(url, scheme)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/parse.py", line 123, in _coerce_args
return _decode_args(args) + (_encode_result,)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/parse.py", line 107, in _decode_args
return tuple(x.decode(encoding, errors) if x else '' for x in args)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/parse.py", line 107, in
return tuple(x.decode(encoding, errors) if x else '' for x in args)
AttributeError: 'DemoModel' object has no attribute 'decode'
https://gist.github.com/lan2720/b83f4b3e2a5375050792c4fc2b0c8ece
I write this one that works well.
I'm getting "Maximum recursion depth exceeded" error in the statement of
archive = load_archive(args.archive_file,
weights_file=None,
cuda_device=args.cuda_device,
overrides="")
I'm running on a Mac that doesn't have cuda_device. Will it be the problem?
https://gist.github.com/lan2720/b83f4b3e2a5375050792c4fc2b0c8ece
I write this one that works well.
I am getting maximum recursion depth error. I did change some part based on current allennlp library but can't get rid of recursion error. Any pointers!!!
What AllenNLP version is used here?
If you want to use newer versions of allennlp (2.4.0)
, allennlp-models (2.4.0)
and spacy (3.0.6)
for this, below might be a good starting point:
from allennlp.predictors.predictor import Predictor
from spacy.language import Language
from spacy.tokens import Doc
@Language.factory("srl", default_config={
"model_path": "https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz"})
def create_srl_component(nlp: Language, name: str, model_path: str):
return SRLComponent(nlp, model_path)
class SRLComponent:
def __init__(self, nlp: Language, model_path: str):
if not Doc.has_extension("srl"):
Doc.set_extension("srl", default=None)
self.predictor = Predictor.from_path(model_path)
def __call__(self, doc: Doc):
predictions = self.predictor.predict(sentence=doc.text)
doc._.srl = predictions
return doc
if __name__ == '__main__':
import spacy
nlp = spacy.blank('en')
nlp.add_pipe("srl")
doc = nlp("The dog trashed the apartment in under 30 seconds.")
print(doc._.srl)
Hello @narayanacharya6,
Is there a quick way to print the result of the semantic role labelling in a file that respects the CoNLL format?
What I would like to do is convert "doc._.srl" to CoNLL format.
Thank you
@felgaet I've used this previously for converting docs to conll - https://github.com/BramVanroy/spacy_conll
I don't know if this is exactly what you are looking for but might be a starting point to where you want to get.
@herlimenezes Do you get any errors?