Last active
August 24, 2019 03:29
-
-
Save dmentipl/8276ca49a2ff56bc73dc17ddba4de793 to your computer and use it in GitHub Desktop.
Make a bibtex file from citations found in a latex file using a "master" bibtex file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Generate bibtex from citations in tex using a master bibtex. | |
It uses the following regex to find citations in the tex file: | |
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}') | |
This regex matches citations like \cite{}, \citet{}, \citep{}, | |
\citeauthor{}, \citet*{}, and so on, and returns the captured citekey. | |
This script requires the bibtool program. Install with, for example, | |
Homebrew or APT. | |
Daniel Mentiplay, 2019. | |
""" | |
import argparse | |
import pathlib | |
import re | |
import subprocess | |
import tempfile | |
BIBTOOL_OPTIONS = tempfile.NamedTemporaryFile(delete=True) | |
with open(BIBTOOL_OPTIONS.name, 'w') as f: | |
f.writelines( | |
'\n'.join( | |
[ | |
'delete.field = "bdsk-file-1"', | |
'delete.field = "bdsk-url-1"', | |
'delete.field = "bdsk-url-2"', | |
'delete.field = "bdsk-url-3"', | |
'delete.field = "date-added"', | |
'delete.field = "date-modified"', | |
'preserve.key.case = on', | |
'print.align.key = 0', | |
'print.indent = 2', | |
'print.align = 0', | |
'print.line.length = 9999', | |
'print.wide.equal = on', | |
'print.equal.right = off', | |
'print.use.tab = off', | |
] | |
) | |
) | |
class CitationNotFoundError(Exception): | |
pass | |
def find_citations_in_texfile(input_tex_file): | |
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}') | |
with open(input_tex_file, 'r') as tex_file: | |
lines = tex_file.readlines() | |
citations = list() | |
for line in lines: | |
matches = regex.findall(line) | |
if matches: | |
for match in matches: | |
if ',' in match: | |
submatches = [m.strip() for m in match.split(',')] | |
for submatch in submatches: | |
citations.append(submatch.strip()) | |
else: | |
citations.append(match.strip()) | |
citations_keys = sorted(set(citations)) | |
if '' in citations_keys: | |
citations_keys.remove('') | |
return citations_keys | |
def write_reduced_bibtex(citations, input_bibtex_file, output_bibtex_file=None): | |
if output_bibtex_file is None: | |
output_bibtex_file = 'references.bib' | |
with open(output_bibtex_file, 'w+') as bibtex_file: | |
for citation in citations: | |
print(f'Adding {citation}') | |
result = subprocess.run( | |
[ | |
'bibtool', | |
'-q', | |
'-r', | |
BIBTOOL_OPTIONS.name, | |
'-X', | |
'^' + citation + '$', | |
input_bibtex_file, | |
], | |
encoding='utf-8', | |
stdout=subprocess.PIPE, | |
) | |
if result.stdout == '': | |
print(f'Cannot find citation key: {citation}') | |
raise CitationNotFoundError() | |
bibtex_file.write(result.stdout) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Make .bib from .tex citations') | |
parser.add_argument( | |
'-t', '--tex-file', required=True, type=str, help='input ".tex" file name' | |
) | |
parser.add_argument( | |
'-b', '--bib-file', required=True, type=str, help='input ".bib" file name' | |
) | |
parser.add_argument( | |
'-o', '--out-file', required=False, type=str, help='output ".bib" file name' | |
) | |
args = parser.parse_args() | |
input_texfile = pathlib.Path(args.tex_file) | |
input_bibtex = pathlib.Path(args.bib_file) | |
if args.out_file is not None: | |
output_bibtex = pathlib.Path(args.out_file) | |
else: | |
output_bibtex = 'references.bib' | |
if not input_texfile.exists(): | |
raise FileNotFoundError(f'{input_texfile} not found') | |
# Get citations from .tex file. | |
print(f'Finding citations in {input_texfile}') | |
citations = find_citations_in_texfile(input_texfile) | |
# Write reduced .bib file. | |
print(f'Writing {output_bibtex} file with citations from {input_bibtex}') | |
write_reduced_bibtex(citations, input_bibtex, output_bibtex) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment