Skip to content

Instantly share code, notes, and snippets.

@CaptSolo
Created May 31, 2009 20:57
Show Gist options
  • Save CaptSolo/121030 to your computer and use it in GitHub Desktop.
Save CaptSolo/121030 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Modified by Uldis Bojars:
- changed to only print the list of figures referenced in LaTeX files
ToDo:
- work around the limitation of only 2 levels of includes
- fix the problem that it does not work if start_file is not in the current
working directory (cwd)
Usage: find_figures.py main.tex
---
Original code retrieved from:
- http://scienceoss.com/python-script-to-package-latex-projects-for-distribution/
This script gathers the necessary images and files (from
an arbitrarily large number of unneeded figures) and
puts it all in a tarball for distribution.
Usage: latexpackager.py main.tex dissertation.tar.gz
"""
import sys
import re
import os
import tarfile
from pprint import pprint
def find_references(f):
'''Returns a list of Latex files that f refers to,
by parsing \include, \bibliography, \bibliographystyle,
\input, etc.
If nothing was found, returns an empty list.'''
s = open(f).read()
# Find the .tex files.
texs = []
for i in re.finditer(r"""[^%]\\include\{(.*)\}""", s):
res = i.groups()[0]
# just looking for a '.' separating filename and extension
if res[-4:-3] == ".":
texs.append(res)
else:
texs.append(res+'.tex')
# Find the .bib files.
bibs = []
for i in re.finditer(r"""[^%]\\bibliography\{(.*)\}""", s):
bibs.append(i.groups()[0]+'.bib')
# Find the styles.
styles = []
for i in re.finditer(r"""[^%]\\bibliographystyle\{(.*)\}""", s):
styles.append(i.groups()[0]+'.bst')
# Find the document class description file
docclass = []
for i in re.finditer(r"""[^%]\\documentclass\{(.*)\}""", s):
docclass.append(i.groups()[0]+'.cls')
# Look for any inputs.
inputs = []
for i in re.finditer(r"""[^%]\\input\{(.*)\}""", s):
res = i.groups()[0]
# just looking for a '.' separating filename and extension
if res[-4:-3] == ".":
texs.append(res)
else:
texs.append(res+'.tex')
# Here is everything that was referenced in f:
return texs + bibs + styles + docclass + inputs
def find_figures(f):
'''Returns a list of figures found in the file. Only
looks in .tex files. If not a .tex file or no figures found,
returns an empty list.'''
# Short circuit if not a .tex file.
if f[-4:] != '.tex':
return []
includegraphics = r"""[^%].*\\includegraphics\[.*\]\{([^\}]*)\}"""
figures = []
s = open(f).read()
matches = re.finditer(includegraphics, s)
for match in matches:
basename = match.groups()[0]
if basename[-4] == '.':
# that is, it has an extension already.
# This is for things like .png images.
figures.append(basename)
else:
figures.append(basename + '.pdf')
figures.append(basename + '.eps')
return figures
def latex_files(start_file):
"""
Find all LaTeX files referenced from the main start_file, up to two levels deep.
"""
file_list = find_references(start_file)
# Don't forget to add the start_file .tex file.
file_list.append(start_file)
# For each of those that main.tex referenced, look for more.
# These are files referenced two levels deep.
for f in file_list:
if f[-4:] != '.tex':
continue
file_list.extend(find_references(f))
return file_list
def list_figures(file_list):
# Now look for graphics.
figures = []
for f in file_list:
figures.extend(find_figures(f))
return figures
def create_archive(fname, file_list):
tarball = tarfile.open(fname, 'w:gz')
for path in file_list:
print path
tarball.add(path)
tarball.close()
def old_main(argv):
main = argv[1]
tarfn = argv[2]
projectdir, main = os.path.split(main)
if projectdir == '':
projectdir = os.getcwd()
keepers = latex_files(main)
figures = list_figures(keepers)
#paths = [os.path.join(projectdir, i) for i in keepers + figures]
paths = keepers + figures
create_archive(tarfn, paths)
def main(start_file):
t_files = latex_files(start_file)
figures = list_figures(t_files)
missing_figures = filter(lambda(f): not os.path.isfile(f), figures)
pprint(missing_figures)
if __name__ == "__main__":
if len(sys.argv) > 1:
main(sys.argv[1])
else:
print "Usage: find_figures.py main.tex"
# oldmain(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment