-
-
Save lan2720/b83f4b3e2a5375050792c4fc2b0c8ece to your computer and use it in GitHub Desktop.
""" | |
Usage: | |
python allennlp_srl.py \ | |
https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2017.09.05.tar.gz \ | |
examples.json | |
Note: | |
each line in examples.json is one sentence, such as: | |
Which NFL team represented the AFC at Super Bowl 50? | |
Where did Super Bowl 50 take place? | |
Which NFL team won Super Bowl 50? | |
What color was used to emphasize the 50th anniversary of the Super Bowl? | |
What was the theme of Super Bowl 50? | |
What day was the game played on? | |
What is the AFC short for? | |
What was the theme of Super Bowl 50? | |
What does AFC stand for? | |
What day was the Super Bowl played on? | |
Who won Super Bowl 50? | |
What venue did Super Bowl 50 take place in? | |
""" | |
from allennlp.service.predictors import Predictor | |
from allennlp.models.archival import load_archive | |
from contextlib import ExitStack | |
import argparse | |
import json | |
def get_arguments(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('archive_file', type=str, help='the archived model to make predictions with') | |
parser.add_argument('input_file', type=argparse.FileType('r'), help='path to input file') | |
parser.add_argument('--output-file', type=argparse.FileType('w'), help='path to output file') | |
parser.add_argument('--batch-size', type=int, default=1, help='The batch size to use for processing') | |
parser.add_argument('--cuda-device', type=int, default=-1, help='id of GPU to use (if any)') | |
args = parser.parse_args() | |
return args | |
def get_predictor(args): | |
archive = load_archive(args.archive_file, | |
weights_file=None, | |
cuda_device=args.cuda_device, | |
overrides="") | |
# Otherwise, use the mapping | |
model_type = archive.config.get("model").get("type") | |
if model_type != 'srl': | |
raise Exception('the given model is not for srl.') | |
return Predictor.from_archive(archive, 'semantic-role-labeling') | |
def run(predictor, | |
input_file, | |
output_file, | |
batch_size, | |
print_to_console, | |
cuda_device): | |
def _run_predictor(batch_data): | |
if len(batch_data) == 1: | |
result = predictor.predict_json(batch_data[0], cuda_device) | |
# Batch results return a list of json objects, so in | |
# order to iterate over the result below we wrap this in a list. | |
results = [result] | |
else: | |
results = predictor.predict_batch_json(batch_data, cuda_device) | |
for model_input, output in zip(batch_data, results): | |
string_output = predictor.dump_line(output) | |
if print_to_console: | |
print("input: ", model_input) | |
print("prediction: ", string_output) | |
if output_file: | |
output_file.write(string_output) | |
batch_data = [] | |
for line in input_file: | |
if not line.isspace(): | |
line = {"sentence":line.strip()} | |
line = json.dumps(line) | |
json_data = predictor.load_line(line) | |
batch_data.append(json_data) | |
if len(batch_data) == batch_size: | |
_run_predictor(batch_data) | |
batch_data = [] | |
if batch_data: | |
_run_predictor(batch_data) | |
def main(): | |
args = get_arguments() | |
predictor = get_predictor(args) | |
output_file = None | |
print_to_console = False | |
# ExitStack allows us to conditionally context-manage `output_file`, which may or may not exist | |
with ExitStack() as stack: | |
input_file = stack.enter_context(args.input_file) # type: ignore | |
if args.output_file: | |
output_file = stack.enter_context(args.output_file) # type: ignore | |
if not args.output_file: | |
print_to_console = True | |
run(predictor, | |
input_file, | |
output_file, | |
args.batch_size, | |
print_to_console, | |
args.cuda_device) | |
if __name__ == '__main__': | |
main() |
hello,I get a error: allennlp.common.checks.ConfigurationError:'key "encoder" is required at location "model."' could you tell me how to fix it. Thankyou very much
could you help me SRL my data in your toolkit ,only 37000 sentences。thankyou very much。I heartfelt hope your reply。
I'm getting "Maximum recursion depth exceeded" error in the statement of
archive = load_archive(args.archive_file,
weights_file=None,
cuda_device=args.cuda_device,
overrides="")
I'm running on a Mac that doesn't have cuda_device. Will it be the problem?
Did you run this on a CUDA system? What's your setup?
Also can you update the code?
I got the following error:
FutureWarning: allennlp.service.predictors.* has been depreciated. Please use allennlp.predictors.*
"Please use allennlp.predictors.*", FutureWarning)
If you ever get rid of maximum recursion depth problem. Also, If it is needed to use CUDA device for this?
For what it's worth:
[victoria@victoria semantics]$ source ~/venv/py3.7/bin/activate
(py3.7) [victoria@victoria semantics]$ env | grep -i virtual
VIRTUAL_ENV=/home/victoria/venv/py3.7
(py3.7) [victoria@victoria semantics]$ python --version
Python 3.7.4
(py3.7) [victoria@victoria semantics]$ pip install allennlp
...
(py3.7) [victoria@victoria semantics]$ date; pwd; ls -l
Mon 25 Nov 2019 03:36:50 PM PST
/mnt/Vancouver/projects/ie/semantics
total 28
-rw-r--r-- 1 victoria victoria 5599 Nov 25 13:28 26902145_30714292
-rw-r--r-- 1 victoria victoria 4449 Nov 25 15:31 allennlp_srl.py
drwxr-xr-x 2 victoria victoria 4096 Nov 15 18:58 becas
-rw-r--r-- 1 victoria victoria 211 Nov 25 15:27 input.json
-rw-r--r-- 1 victoria victoria 2582 Nov 25 14:49 spacy_srl.py
(py3.7) [victoria@victoria semantics]$ cat input.json
{
"Breast cancer susceptibility gene 1 (BRCA1) is a tumor suppressor protein that functions to maintain genomic stability through critical roles in DNA repair, cell-cycle arrest, and transcriptional control."
"The androgen receptor (AR) is expressed in more than 70% of breast cancers and has been implicated in breast cancer pathogenesis."
}
(py3.7) [victoria@victoria semantics]$
## https://gist.github.com/lan2720/b83f4b3e2a5375050792c4fc2b0c8ece
## https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2017.09.05.tar.gz
## needed updated (2018) model (above is 2017): https://github.com/allenai/allennlp/issues/1015#issuecomment-375314181
## https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.02.27.tar.gz
## https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz
## NO GPU so edited CUDA-related lines ...
(py3.7) [victoria@victoria semantics]$ python allennlp_srl.py https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz input.json
input: {"sentence": "{"}
prediction: {"verbs": [], "words": ["{"]}
input: {"sentence": ""Breast cancer susceptibility gene 1 (BRCA1) is a tumor suppressor protein that functions to maintain genomic stability through critical roles in DNA repair, cell-cycle arrest, and transcriptional control.""}
prediction: {"verbs": [{"verb": "is", "description": "" [ARG1: Breast cancer susceptibility gene 1 ( BRCA1 )] [V: is] [ARG2: a tumor suppressor protein that functions to maintain genomic stability through critical roles in DNA repair , cell - cycle arrest , and transcriptional control] . "", "tags": ["O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "B-V", "B-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "O", "O"]}, {"verb": "maintain", "description": "" Breast cancer susceptibility gene 1 ( BRCA1 ) is a tumor suppressor protein [ARG0: that] functions to [V: maintain] [ARG1: genomic stability] [ARGM-MNR: through critical roles in DNA repair , cell - cycle arrest , and transcriptional control] . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-ARG0", "O", "O", "B-V", "B-ARG1", "I-ARG1", "B-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "O", "O"]}], "words": [""", "Breast", "cancer", "susceptibility", "gene", "1", "(", "BRCA1", ")", "is", "a", "tumor", "suppressor", "protein", "that", "functions", "to", "maintain", "genomic", "stability", "through", "critical", "roles", "in", "DNA", "repair", ",", "cell", "-", "cycle", "arrest", ",", "and", "transcriptional", "control", ".", """]}
input: {"sentence": ""The androgen receptor (AR) is expressed in more than 70% of breast cancers and has been implicated in breast cancer pathogenesis.""}
prediction: {"verbs": [{"verb": "is", "description": "" The androgen receptor ( AR ) [V: is] expressed in more than 70 % of breast cancers and has been implicated in breast cancer pathogenesis . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "expressed", "description": "" [ARG1: The androgen receptor ( AR )] is [V: expressed] [ARGM-LOC: in more than 70 % of breast cancers] and has been implicated in breast cancer pathogenesis . "", "tags": ["O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", "B-V", "B-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "has", "description": "" The androgen receptor ( AR ) is expressed in more than 70 % of breast cancers and [V: has] been implicated in breast cancer pathogenesis . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "been", "description": "" The androgen receptor ( AR ) is expressed in more than 70 % of breast cancers and has [V: been] implicated in breast cancer pathogenesis . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "implicated", "description": "" [ARG1: The androgen receptor ( AR )] is expressed in more than 70 % of breast cancers and has been [V: implicated] [ARG2: in breast cancer pathogenesis] . "", "tags": ["O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "B-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "O", "O"]}], "words": [""", "The", "androgen", "receptor", "(", "AR", ")", "is", "expressed", "in", "more", "than", "70", "%", "of", "breast", "cancers", "and", "has", "been", "implicated", "in", "breast", "cancer", "pathogenesis", ".", """]}
input: {"sentence": "}"}
prediction: {"verbs": [], "words": ["}"]}
(py3.7) [victoria@victoria semantics]$
For what it's worth:
[victoria@victoria semantics]$ source ~/venv/py3.7/bin/activate (py3.7) [victoria@victoria semantics]$ env | grep -i virtual VIRTUAL_ENV=/home/victoria/venv/py3.7 (py3.7) [victoria@victoria semantics]$ python --version Python 3.7.4 (py3.7) [victoria@victoria semantics]$ pip install allennlp ... (py3.7) [victoria@victoria semantics]$ date; pwd; ls -l Mon 25 Nov 2019 03:36:50 PM PST /mnt/Vancouver/projects/ie/semantics total 28 -rw-r--r-- 1 victoria victoria 5599 Nov 25 13:28 26902145_30714292 -rw-r--r-- 1 victoria victoria 4449 Nov 25 15:31 allennlp_srl.py drwxr-xr-x 2 victoria victoria 4096 Nov 15 18:58 becas -rw-r--r-- 1 victoria victoria 211 Nov 25 15:27 input.json -rw-r--r-- 1 victoria victoria 2582 Nov 25 14:49 spacy_srl.py (py3.7) [victoria@victoria semantics]$ cat input.json { "Breast cancer susceptibility gene 1 (BRCA1) is a tumor suppressor protein that functions to maintain genomic stability through critical roles in DNA repair, cell-cycle arrest, and transcriptional control." "The androgen receptor (AR) is expressed in more than 70% of breast cancers and has been implicated in breast cancer pathogenesis." } (py3.7) [victoria@victoria semantics]$ ## https://gist.github.com/lan2720/b83f4b3e2a5375050792c4fc2b0c8ece ## https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2017.09.05.tar.gz ## needed updated (2018) model (above is 2017): https://github.com/allenai/allennlp/issues/1015#issuecomment-375314181 ## https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.02.27.tar.gz ## https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz ## NO GPU so edited CUDA-related lines ... (py3.7) [victoria@victoria semantics]$ python allennlp_srl.py https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz input.json input: {"sentence": "{"} prediction: {"verbs": [], "words": ["{"]} input: {"sentence": ""Breast cancer susceptibility gene 1 (BRCA1) is a tumor suppressor protein that functions to maintain genomic stability through critical roles in DNA repair, cell-cycle arrest, and transcriptional control.""} prediction: {"verbs": [{"verb": "is", "description": "" [ARG1: Breast cancer susceptibility gene 1 ( BRCA1 )] [V: is] [ARG2: a tumor suppressor protein that functions to maintain genomic stability through critical roles in DNA repair , cell - cycle arrest , and transcriptional control] . "", "tags": ["O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "B-V", "B-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "O", "O"]}, {"verb": "maintain", "description": "" Breast cancer susceptibility gene 1 ( BRCA1 ) is a tumor suppressor protein [ARG0: that] functions to [V: maintain] [ARG1: genomic stability] [ARGM-MNR: through critical roles in DNA repair , cell - cycle arrest , and transcriptional control] . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-ARG0", "O", "O", "B-V", "B-ARG1", "I-ARG1", "B-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "I-ARGM-MNR", "O", "O"]}], "words": [""", "Breast", "cancer", "susceptibility", "gene", "1", "(", "BRCA1", ")", "is", "a", "tumor", "suppressor", "protein", "that", "functions", "to", "maintain", "genomic", "stability", "through", "critical", "roles", "in", "DNA", "repair", ",", "cell", "-", "cycle", "arrest", ",", "and", "transcriptional", "control", ".", """]} input: {"sentence": ""The androgen receptor (AR) is expressed in more than 70% of breast cancers and has been implicated in breast cancer pathogenesis.""} prediction: {"verbs": [{"verb": "is", "description": "" The androgen receptor ( AR ) [V: is] expressed in more than 70 % of breast cancers and has been implicated in breast cancer pathogenesis . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "expressed", "description": "" [ARG1: The androgen receptor ( AR )] is [V: expressed] [ARGM-LOC: in more than 70 % of breast cancers] and has been implicated in breast cancer pathogenesis . "", "tags": ["O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", "B-V", "B-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "I-ARGM-LOC", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "has", "description": "" The androgen receptor ( AR ) is expressed in more than 70 % of breast cancers and [V: has] been implicated in breast cancer pathogenesis . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "been", "description": "" The androgen receptor ( AR ) is expressed in more than 70 % of breast cancers and has [V: been] implicated in breast cancer pathogenesis . "", "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O"]}, {"verb": "implicated", "description": "" [ARG1: The androgen receptor ( AR )] is expressed in more than 70 % of breast cancers and has been [V: implicated] [ARG2: in breast cancer pathogenesis] . "", "tags": ["O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "B-ARG2", "I-ARG2", "I-ARG2", "I-ARG2", "O", "O"]}], "words": [""", "The", "androgen", "receptor", "(", "AR", ")", "is", "expressed", "in", "more", "than", "70", "%", "of", "breast", "cancers", "and", "has", "been", "implicated", "in", "breast", "cancer", "pathogenesis", ".", """]} input: {"sentence": "}"} prediction: {"verbs": [], "words": ["}"]} (py3.7) [victoria@victoria semantics]$
I tried the above steps on Co-lab. Got the below error:
File "/usr/local/lib/python3.6/dist-packages/allennlp/models/model.py", line 360, in load
model_class: Type[Model] = cls.by_name(model_type) # type: ignore
File "/usr/local/lib/python3.6/dist-packages/allennlp/common/registrable.py", line 137, in by_name
subclass, constructor = cls.resolve_class_name(name)
File "/usr/local/lib/python3.6/dist-packages/allennlp/common/registrable.py", line 185, in resolve_class_name
f"{name} is not a registered name for {cls.__name__}. "
allennlp.common.checks.ConfigurationError: srl is not a registered name for Model. You probably need to use the --include-package flag to load your custom code. Alternatively, you can specify your choices using fully-qualified paths, e.g. {"model": "my_module.models.MyModel"} in which case they will be automatically imported correctly.
I tried the above steps on Co-lab. Got the below error:
File "/usr/local/lib/python3.6/dist-packages/allennlp/models/model.py", line 360, in load model_class: Type[Model] = cls.by_name(model_type) # type: ignore File "/usr/local/lib/python3.6/dist-packages/allennlp/common/registrable.py", line 137, in by_name subclass, constructor = cls.resolve_class_name(name) File "/usr/local/lib/python3.6/dist-packages/allennlp/common/registrable.py", line 185, in resolve_class_name f"{name} is not a registered name for {cls.__name__}. " allennlp.common.checks.ConfigurationError: srl is not a registered name for Model. You probably need to use the --include-package flag to load your custom code. Alternatively, you can specify your choices using fully-qualified paths, e.g. {"model": "my_module.models.MyModel"} in which case they will be automatically imported correctly.
You need to install the allennlp models: pip install allennlp-models
(hinted at here). This will install models and the matching version of allennlp, if required for the model version.
Should install AllenNLP first.
https://github.com/allenai/allennlp/blob/v0.4.0/tutorials/getting_started/installation.md