Last active
October 27, 2021 12:19
-
-
Save oiao/5753aaac1540e9e6a793d080895877ac to your computer and use it in GitHub Desktop.
Convert a list of DOIs to BibTex strings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
def get_json(doi:str) -> dict: | |
# get citation dict from doi string | |
import re | |
import urllib.request | |
from urllib.error import HTTPError | |
import json | |
from pprint import pprint | |
for match in re.findall('doi.org/(.*)', doi): | |
match = match | |
break | |
else: | |
match = doi | |
req = urllib.request.Request('https://dx.doi.org/' + match) | |
# see https://citation.crosscite.org/docs.html | |
req.add_header('Accept', 'application/vnd.citationstyles.csl+json') # JSON | |
# req.add_header('Accept', 'application/x-bibtex') # BIBTEX | |
try: | |
with urllib.request.urlopen(req) as f: | |
d = f.read().decode() | |
except HTTPError as e: | |
print(f"HTTP Error for {doi}: {e.code}") | |
exit(1) | |
except Error as e: | |
raise | |
return json.loads(d) | |
def bibify(d:dict, to_str=True, abbreviate_journal=True, titlecase=True) -> dict: | |
# filter the output of get_json() to bibtex format | |
bibtex = {} | |
bibtex['ENTRYTYPE'] = 'article' if 'article' in d['type'] else d['title'] | |
bibtex['title'] = d['title'] | |
if titlecase: | |
bibtex['title'] = bibtex['title'].title() | |
bibtex['author'] = ' and '.join( f"{dd['family']}, {dd['given']}" for dd in d['author'] ) | |
if abbreviate_journal and 'container-title-short' in d: | |
bibtex['journal'] = ' '.join(i+'.' if not i.endswith('.') else i for i in d['container-title-short'].split()) | |
else: | |
bibtex['journal'] = d['container-title'] | |
if 'page' in d: | |
bibtex['pages'] = '--'.join(d['page'].split('-')) | |
if 'number' in d: | |
bibtex['issue'] = d['number'] | |
y, *md = d['published']['date-parts'][0] | |
bibtex['year'] = y | |
if md: | |
bibtex['month'] = md[0] | |
for k in 'doi', 'url', 'volume', 'publisher': | |
if k in d: | |
bibtex[k] = d[k] | |
bibtex['ID'] = d['author'][0]['family'] + f"{y}" | |
if to_str: | |
for k,v in bibtex.items(): | |
bibtex[k] = str(v) | |
return bibtex | |
def dump(d:dict) -> str: | |
# dump the output of `bibify()` to string | |
out = f"@{d['ENTRYTYPE']}" + '{' + f"{d['ID']},\n" | |
for k,v in d.items(): | |
if k not in {'ENTRYTYPE', 'ID'}: | |
out += f" {k} = " + '{' + str(v) + '},\n' | |
out += '}\n\n' | |
return out | |
if __name__ == '__main__': | |
import argparse | |
parser = argparse.ArgumentParser(description='DOI to BibTex converter') | |
parser.add_argument('DOIs', nargs='+', help='DOI strings to be converted to BibTex') | |
parser.add_argument('-t', '--titlecase', action='store_true', help='Use title case for titles') | |
parser.add_argument('-a', '--abbreviate', action='store_true', help='Abbreviate journal names') | |
args = parser.parse_args() | |
try: | |
from bibtexparser.bibdatabase import BibDatabase | |
from bibtexparser import dumps as _dumps | |
bibparser = True | |
_db = BibDatabase() | |
db = _db.entries | |
dumps = lambda db: _dumps(_db) | |
# p = bibtexparser.bparser.BibTexParser(common_strings=True) # For month names. | |
except ImportError: | |
bibparser = False | |
dumps = lambda db: ''.join(dump(i) for i in db) | |
db = [] | |
for doi in args.DOIs: | |
d = get_json(doi) | |
d = bibify(d, abbreviate_journal=args.abbreviate, titlecase=args.titlecase) | |
db.append(d) | |
print() | |
print(dumps(db)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment