Skip to content

Instantly share code, notes, and snippets.

@mbollmann
Created August 11, 2025 19:33
Show Gist options
  • Save mbollmann/9888dec241817483438d5d9c88cbcd33 to your computer and use it in GitHub Desktop.
Save mbollmann/9888dec241817483438d5d9c88cbcd33 to your computer and use it in GitHub Desktop.
Script to dump all namespec–person associations in the ACL Anthology
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2025 Marcel Bollmann <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Usage: dump_papers.py [options]
Dumps a list of all paper–author name combinations together with the person ID they resolve to.
Options:
--debug Output debug-level log messages.
-d, --datadir=DIR Directory with data files. [default: {scriptdir}/../../data]
-h, --help Display this helpful text.
"""
from docopt import docopt
import itertools as it
import os
from pathlib import Path
from acl_anthology import Anthology
if __name__ == "__main__":
args = docopt(__doc__)
if "{scriptdir}" in args["--datadir"]:
args["--datadir"] = os.path.abspath(
args["--datadir"].format(scriptdir=os.path.dirname(os.path.abspath(__file__)))
)
datadir = Path(args["--datadir"])
anthology = Anthology(datadir=datadir, verbose=False)
for paper in anthology.papers():
for namespec in it.chain(paper.authors, paper.editors):
print(
"\t".join(
(
paper.full_id,
namespec.name.as_last_first(),
anthology.resolve(namespec).id,
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment