Created
May 31, 2009 20:57
-
-
Save CaptSolo/121030 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Modified by Uldis Bojars: | |
- changed to only print the list of figures referenced in LaTeX files | |
ToDo: | |
- work around the limitation of only 2 levels of includes | |
- fix the problem that it does not work if start_file is not in the current | |
working directory (cwd) | |
Usage: find_figures.py main.tex | |
--- | |
Original code retrieved from: | |
- http://scienceoss.com/python-script-to-package-latex-projects-for-distribution/ | |
This script gathers the necessary images and files (from | |
an arbitrarily large number of unneeded figures) and | |
puts it all in a tarball for distribution. | |
Usage: latexpackager.py main.tex dissertation.tar.gz | |
""" | |
import sys | |
import re | |
import os | |
import tarfile | |
from pprint import pprint | |
def find_references(f): | |
'''Returns a list of Latex files that f refers to, | |
by parsing \include, \bibliography, \bibliographystyle, | |
\input, etc. | |
If nothing was found, returns an empty list.''' | |
s = open(f).read() | |
# Find the .tex files. | |
texs = [] | |
for i in re.finditer(r"""[^%]\\include\{(.*)\}""", s): | |
res = i.groups()[0] | |
# just looking for a '.' separating filename and extension | |
if res[-4:-3] == ".": | |
texs.append(res) | |
else: | |
texs.append(res+'.tex') | |
# Find the .bib files. | |
bibs = [] | |
for i in re.finditer(r"""[^%]\\bibliography\{(.*)\}""", s): | |
bibs.append(i.groups()[0]+'.bib') | |
# Find the styles. | |
styles = [] | |
for i in re.finditer(r"""[^%]\\bibliographystyle\{(.*)\}""", s): | |
styles.append(i.groups()[0]+'.bst') | |
# Find the document class description file | |
docclass = [] | |
for i in re.finditer(r"""[^%]\\documentclass\{(.*)\}""", s): | |
docclass.append(i.groups()[0]+'.cls') | |
# Look for any inputs. | |
inputs = [] | |
for i in re.finditer(r"""[^%]\\input\{(.*)\}""", s): | |
res = i.groups()[0] | |
# just looking for a '.' separating filename and extension | |
if res[-4:-3] == ".": | |
texs.append(res) | |
else: | |
texs.append(res+'.tex') | |
# Here is everything that was referenced in f: | |
return texs + bibs + styles + docclass + inputs | |
def find_figures(f): | |
'''Returns a list of figures found in the file. Only | |
looks in .tex files. If not a .tex file or no figures found, | |
returns an empty list.''' | |
# Short circuit if not a .tex file. | |
if f[-4:] != '.tex': | |
return [] | |
includegraphics = r"""[^%].*\\includegraphics\[.*\]\{([^\}]*)\}""" | |
figures = [] | |
s = open(f).read() | |
matches = re.finditer(includegraphics, s) | |
for match in matches: | |
basename = match.groups()[0] | |
if basename[-4] == '.': | |
# that is, it has an extension already. | |
# This is for things like .png images. | |
figures.append(basename) | |
else: | |
figures.append(basename + '.pdf') | |
figures.append(basename + '.eps') | |
return figures | |
def latex_files(start_file): | |
""" | |
Find all LaTeX files referenced from the main start_file, up to two levels deep. | |
""" | |
file_list = find_references(start_file) | |
# Don't forget to add the start_file .tex file. | |
file_list.append(start_file) | |
# For each of those that main.tex referenced, look for more. | |
# These are files referenced two levels deep. | |
for f in file_list: | |
if f[-4:] != '.tex': | |
continue | |
file_list.extend(find_references(f)) | |
return file_list | |
def list_figures(file_list): | |
# Now look for graphics. | |
figures = [] | |
for f in file_list: | |
figures.extend(find_figures(f)) | |
return figures | |
def create_archive(fname, file_list): | |
tarball = tarfile.open(fname, 'w:gz') | |
for path in file_list: | |
print path | |
tarball.add(path) | |
tarball.close() | |
def old_main(argv): | |
main = argv[1] | |
tarfn = argv[2] | |
projectdir, main = os.path.split(main) | |
if projectdir == '': | |
projectdir = os.getcwd() | |
keepers = latex_files(main) | |
figures = list_figures(keepers) | |
#paths = [os.path.join(projectdir, i) for i in keepers + figures] | |
paths = keepers + figures | |
create_archive(tarfn, paths) | |
def main(start_file): | |
t_files = latex_files(start_file) | |
figures = list_figures(t_files) | |
missing_figures = filter(lambda(f): not os.path.isfile(f), figures) | |
pprint(missing_figures) | |
if __name__ == "__main__": | |
if len(sys.argv) > 1: | |
main(sys.argv[1]) | |
else: | |
print "Usage: find_figures.py main.tex" | |
# oldmain(sys.argv) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment