Skip to content

Instantly share code, notes, and snippets.

@dmentipl
Last active August 24, 2019 03:29
Show Gist options
  • Save dmentipl/8276ca49a2ff56bc73dc17ddba4de793 to your computer and use it in GitHub Desktop.
Save dmentipl/8276ca49a2ff56bc73dc17ddba4de793 to your computer and use it in GitHub Desktop.
Make a bibtex file from citations found in a latex file using a "master" bibtex file
#!/usr/bin/env python
"""
Generate bibtex from citations in tex using a master bibtex.
It uses the following regex to find citations in the tex file:
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}')
This regex matches citations like \cite{}, \citet{}, \citep{},
\citeauthor{}, \citet*{}, and so on, and returns the captured citekey.
This script requires the bibtool program. Install with, for example,
Homebrew or APT.
Daniel Mentiplay, 2019.
"""
import argparse
import pathlib
import re
import subprocess
import tempfile
BIBTOOL_OPTIONS = tempfile.NamedTemporaryFile(delete=True)
with open(BIBTOOL_OPTIONS.name, 'w') as f:
f.writelines(
'\n'.join(
[
'delete.field = "bdsk-file-1"',
'delete.field = "bdsk-url-1"',
'delete.field = "bdsk-url-2"',
'delete.field = "bdsk-url-3"',
'delete.field = "date-added"',
'delete.field = "date-modified"',
'preserve.key.case = on',
'print.align.key = 0',
'print.indent = 2',
'print.align = 0',
'print.line.length = 9999',
'print.wide.equal = on',
'print.equal.right = off',
'print.use.tab = off',
]
)
)
class CitationNotFoundError(Exception):
pass
def find_citations_in_texfile(input_tex_file):
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}')
with open(input_tex_file, 'r') as tex_file:
lines = tex_file.readlines()
citations = list()
for line in lines:
matches = regex.findall(line)
if matches:
for match in matches:
if ',' in match:
submatches = [m.strip() for m in match.split(',')]
for submatch in submatches:
citations.append(submatch.strip())
else:
citations.append(match.strip())
citations_keys = sorted(set(citations))
if '' in citations_keys:
citations_keys.remove('')
return citations_keys
def write_reduced_bibtex(citations, input_bibtex_file, output_bibtex_file=None):
if output_bibtex_file is None:
output_bibtex_file = 'references.bib'
with open(output_bibtex_file, 'w+') as bibtex_file:
for citation in citations:
print(f'Adding {citation}')
result = subprocess.run(
[
'bibtool',
'-q',
'-r',
BIBTOOL_OPTIONS.name,
'-X',
'^' + citation + '$',
input_bibtex_file,
],
encoding='utf-8',
stdout=subprocess.PIPE,
)
if result.stdout == '':
print(f'Cannot find citation key: {citation}')
raise CitationNotFoundError()
bibtex_file.write(result.stdout)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Make .bib from .tex citations')
parser.add_argument(
'-t', '--tex-file', required=True, type=str, help='input ".tex" file name'
)
parser.add_argument(
'-b', '--bib-file', required=True, type=str, help='input ".bib" file name'
)
parser.add_argument(
'-o', '--out-file', required=False, type=str, help='output ".bib" file name'
)
args = parser.parse_args()
input_texfile = pathlib.Path(args.tex_file)
input_bibtex = pathlib.Path(args.bib_file)
if args.out_file is not None:
output_bibtex = pathlib.Path(args.out_file)
else:
output_bibtex = 'references.bib'
if not input_texfile.exists():
raise FileNotFoundError(f'{input_texfile} not found')
# Get citations from .tex file.
print(f'Finding citations in {input_texfile}')
citations = find_citations_in_texfile(input_texfile)
# Write reduced .bib file.
print(f'Writing {output_bibtex} file with citations from {input_bibtex}')
write_reduced_bibtex(citations, input_bibtex, output_bibtex)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment