Last active
March 27, 2017 08:49
-
-
Save ntessore/13d2748b338b4d91287541f09e2d4722 to your computer and use it in GitHub Desktop.
query the ADS bibliography database for a list of bibcodes or the missing citations from a LaTeX log file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#### | |
# small command line tool to get a BibTeX bibliography from ADS bibcodes | |
# | |
# $ adsbibq.py '2015A&A...580A..79T' '2016MNRAS.463.3115T' | |
# | |
# Query Results from the ADS Database | |
# | |
# Retrieved 2 abstracts, starting with number 1. Total number selected: 2. | |
# ... | |
# | |
#### | |
from argparse import ArgumentParser | |
from re import finditer | |
from sys import stdin, stdout | |
from urllib.parse import urlencode | |
from urllib.request import urlopen, URLError | |
# argument parsing | |
parser = ArgumentParser() | |
parser.add_argument('bibcode', nargs='*', help='query bibcodes') | |
parser.add_argument('--log', action='store_true', help='parse log file') | |
args = parser.parse_args() | |
# make sure at least one way of input is provided | |
if (not args.bibcode) and (not args.log): | |
parser.error('either bibcodes or the --log option must be given') | |
# list of bibcodes to query | |
bibcodes = [] | |
# read log or use bibcodes | |
if args.log: | |
# read the log from stdin and strip newlines (LaTeX warnings can wrap) | |
log = stdin.read().replace('\n', '').replace('\r', '') | |
# the natbib warning message to search for undefined citations | |
pattern = r"Package natbib Warning: Citation `(.*?)' on page \d+ undefined" | |
# find all warnings in the log and store citekeys as bibcodes | |
for m in finditer(pattern, log): | |
bibcodes.append(m.group(1)) | |
else: | |
# take bibcodes from arguments | |
bibcodes = args.bibcode | |
# sort and remove duplicates | |
bibcodes = sorted(set(bibcodes)) | |
# prepare the ADS query | |
query = [ | |
('bibcode', bibcodes), | |
('data_type', 'BIBTEX'), | |
('db_key', 'AST'), | |
('nocookieset', '1'), | |
] | |
# the base url and data of the request | |
urlbase = 'http://adsabs.harvard.edu/cgi-bin/nph-bib_query' | |
urldata = urlencode(query, True).encode('ascii') | |
# perform request and output the result | |
try: | |
f = urlopen(urlbase, urldata) | |
stdout.write(f.read().decode('utf-8')) | |
except URLError: | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment