Last active
September 21, 2022 08:56
-
-
Save JohannesBuchner/53d8d550b757db61eb446996d1d8e0a0 to your computer and use it in GitHub Desktop.
Prepare arxiv latex submission tarball (package all files into one directory with bib, figure files, etc).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Package a paper written in latex for arxiv. | |
Rationale | |
--------- | |
You may have figures and bibliography included from somewhere else in your file system with absolute paths. | |
This script makes a subdirectory package-mylatexfile.tex/ which contains the latex file, figures, .bib, input files referenced in the tex file | |
in the subdirectory, pdflatex mylatexfile.tex should work and not touch any files outside the subdirectory. | |
the subdirectory can then by tarred and uploaded to arxiv. | |
Synopsis:: | |
python3 package.py mylatexfile.tex | |
""" | |
import shutil | |
import sys | |
import os | |
import codecs | |
filename = sys.argv[1] | |
outdir = 'package-%s/' % filename | |
if not os.path.exists(outdir): os.mkdir(outdir) | |
#l = open(filename).readlines() | |
outfile = codecs.open(outdir + filename, 'w', encoding='latin1') | |
file_ids = {} | |
def handle_resource(filepath): | |
if filepath not in file_ids: | |
filename = os.path.basename(filepath) | |
if filename.count('.') > 1: # remove additional "." | |
prefix, suffix = filename.rsplit('.', maxsplit=1) | |
prefix = prefix.replace('.', '_') | |
filename = '%s.%s' % (prefix, suffix) | |
# if we have same name already from a different folder | |
if filename in file_ids.values(): | |
# add a number to it | |
prefix, suffix = filename.rsplit('.', maxsplit=1) | |
prefix = prefix.replace('.', '_') | |
filename = '%s.%s' % (prefix, suffix) | |
for i in range(100): | |
filenameplus = '%s%d.%s' % (prefix, i, suffix) | |
if filenameplus not in file_ids.values(): | |
filename = filenameplus | |
break | |
file_ids[filepath] = filename | |
filename = file_ids[filepath] | |
outfilename = outdir + filename | |
print('copying resource %s from %s' % (filename, filepath)) | |
shutil.copyfile(filepath, outfilename) | |
return filename | |
def handle_command(cmd, l, filepostfix = ''): | |
i = l.index(r'\%s{' % cmd) + len(r'\%s{' % cmd) | |
n = l[i:].index('}') | |
filename = handle_resource(l[i:i+n] + filepostfix) | |
l = '%s%s%s' % (l[:i], filename, l[i+n:]) | |
return l | |
def handle_command_multiple(cmd, l, filepostfix = ''): | |
i = l.index(r'\%s{' % cmd) + len(r'\%s{' % cmd) | |
n = l[i:].index('}') | |
filenames = [] | |
for li in l[i:i+n].split(','): | |
lout = handle_resource(li + filepostfix) | |
filenames.append(lout.replace(filepostfix, '')) | |
l = '%s%s%s' % (l[:i], ','.join(filenames), l[i+n:]) | |
return l | |
for l in codecs.open(filename, 'r', encoding='latin1'): | |
l = l.replace(r'\altaffiltext', r'\altaffiliation') | |
if l.startswith('\\includegraphics'): | |
before, after = l.split('{', 1) | |
filepath, post = after.split("}", 1) | |
realfilepath = filepath.replace('\\lyxdot ', '.') | |
if os.path.exists(realfilepath + '.pdf'): | |
realfilepath = realfilepath + '.pdf' | |
elif os.path.exists(realfilepath + '.eps'): | |
realfilepath = realfilepath + '.eps' | |
elif os.path.exists(realfilepath + '.png'): | |
realfilepath = realfilepath + '.png' | |
filename = handle_resource(realfilepath) | |
l = '%s{%s}%s' % (before, filename.replace('.pdf', ''), post) | |
if '}\\includegraphics' in l: | |
i = l.index('}\\includegraphics') | |
ll = l[i:] | |
before, after = ll.split('{', 1) | |
filepath, post = after.split("}", 1) | |
realfilepath = filepath.replace('\\lyxdot ', '.') | |
if os.path.exists(realfilepath + '.pdf'): | |
realfilepath = realfilepath + '.pdf' | |
elif os.path.exists(realfilepath + '.eps'): | |
realfilepath = realfilepath + '.eps' | |
elif os.path.exists(realfilepath + '.png'): | |
realfilepath = realfilepath + '.png' | |
filename = handle_resource(realfilepath) | |
l = l[:i] + '%s{%s}%s' % (before, filename.replace('.pdf', ''), post) | |
if '\\input{' in l: | |
l = handle_command('input', l) | |
if '\\include{' in l: | |
l = handle_command('include', l, filepostfix = '.tex') | |
if '\\bibliography{' in l: | |
l = handle_command_multiple('bibliography', l, filepostfix='.bib') | |
# copy over styles if they exist | |
if l.startswith('\\documentclass'): | |
before, after = l.split('{', 1) | |
filepath, post = after.split("}", 1) | |
realfilepath = filepath + '.cls' | |
if os.path.exists(realfilepath): | |
_ = handle_resource(realfilepath) | |
if l.startswith('\\bibliographystyle'): | |
before, after = l.split('{', 1) | |
filepath, post = after.split("}", 1) | |
realfilepath = filepath + '.bst' | |
if os.path.exists(realfilepath): | |
_ = handle_resource(realfilepath) | |
outfile.write(l) | |
outfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment