Skip to content

Instantly share code, notes, and snippets.

@JohannesBuchner
Last active September 21, 2022 08:56
Show Gist options
  • Save JohannesBuchner/53d8d550b757db61eb446996d1d8e0a0 to your computer and use it in GitHub Desktop.
Save JohannesBuchner/53d8d550b757db61eb446996d1d8e0a0 to your computer and use it in GitHub Desktop.
Prepare arxiv latex submission tarball (package all files into one directory with bib, figure files, etc).
"""Package a paper written in latex for arxiv.
Rationale
---------
You may have figures and bibliography included from somewhere else in your file system with absolute paths.
This script makes a subdirectory package-mylatexfile.tex/ which contains the latex file, figures, .bib, input files referenced in the tex file
in the subdirectory, pdflatex mylatexfile.tex should work and not touch any files outside the subdirectory.
the subdirectory can then by tarred and uploaded to arxiv.
Synopsis::
python3 package.py mylatexfile.tex
"""
import shutil
import sys
import os
import codecs
filename = sys.argv[1]
outdir = 'package-%s/' % filename
if not os.path.exists(outdir): os.mkdir(outdir)
#l = open(filename).readlines()
outfile = codecs.open(outdir + filename, 'w', encoding='latin1')
file_ids = {}
def handle_resource(filepath):
if filepath not in file_ids:
filename = os.path.basename(filepath)
if filename.count('.') > 1: # remove additional "."
prefix, suffix = filename.rsplit('.', maxsplit=1)
prefix = prefix.replace('.', '_')
filename = '%s.%s' % (prefix, suffix)
# if we have same name already from a different folder
if filename in file_ids.values():
# add a number to it
prefix, suffix = filename.rsplit('.', maxsplit=1)
prefix = prefix.replace('.', '_')
filename = '%s.%s' % (prefix, suffix)
for i in range(100):
filenameplus = '%s%d.%s' % (prefix, i, suffix)
if filenameplus not in file_ids.values():
filename = filenameplus
break
file_ids[filepath] = filename
filename = file_ids[filepath]
outfilename = outdir + filename
print('copying resource %s from %s' % (filename, filepath))
shutil.copyfile(filepath, outfilename)
return filename
def handle_command(cmd, l, filepostfix = ''):
i = l.index(r'\%s{' % cmd) + len(r'\%s{' % cmd)
n = l[i:].index('}')
filename = handle_resource(l[i:i+n] + filepostfix)
l = '%s%s%s' % (l[:i], filename, l[i+n:])
return l
def handle_command_multiple(cmd, l, filepostfix = ''):
i = l.index(r'\%s{' % cmd) + len(r'\%s{' % cmd)
n = l[i:].index('}')
filenames = []
for li in l[i:i+n].split(','):
lout = handle_resource(li + filepostfix)
filenames.append(lout.replace(filepostfix, ''))
l = '%s%s%s' % (l[:i], ','.join(filenames), l[i+n:])
return l
for l in codecs.open(filename, 'r', encoding='latin1'):
l = l.replace(r'\altaffiltext', r'\altaffiliation')
if l.startswith('\\includegraphics'):
before, after = l.split('{', 1)
filepath, post = after.split("}", 1)
realfilepath = filepath.replace('\\lyxdot ', '.')
if os.path.exists(realfilepath + '.pdf'):
realfilepath = realfilepath + '.pdf'
elif os.path.exists(realfilepath + '.eps'):
realfilepath = realfilepath + '.eps'
elif os.path.exists(realfilepath + '.png'):
realfilepath = realfilepath + '.png'
filename = handle_resource(realfilepath)
l = '%s{%s}%s' % (before, filename.replace('.pdf', ''), post)
if '}\\includegraphics' in l:
i = l.index('}\\includegraphics')
ll = l[i:]
before, after = ll.split('{', 1)
filepath, post = after.split("}", 1)
realfilepath = filepath.replace('\\lyxdot ', '.')
if os.path.exists(realfilepath + '.pdf'):
realfilepath = realfilepath + '.pdf'
elif os.path.exists(realfilepath + '.eps'):
realfilepath = realfilepath + '.eps'
elif os.path.exists(realfilepath + '.png'):
realfilepath = realfilepath + '.png'
filename = handle_resource(realfilepath)
l = l[:i] + '%s{%s}%s' % (before, filename.replace('.pdf', ''), post)
if '\\input{' in l:
l = handle_command('input', l)
if '\\include{' in l:
l = handle_command('include', l, filepostfix = '.tex')
if '\\bibliography{' in l:
l = handle_command_multiple('bibliography', l, filepostfix='.bib')
# copy over styles if they exist
if l.startswith('\\documentclass'):
before, after = l.split('{', 1)
filepath, post = after.split("}", 1)
realfilepath = filepath + '.cls'
if os.path.exists(realfilepath):
_ = handle_resource(realfilepath)
if l.startswith('\\bibliographystyle'):
before, after = l.split('{', 1)
filepath, post = after.split("}", 1)
realfilepath = filepath + '.bst'
if os.path.exists(realfilepath):
_ = handle_resource(realfilepath)
outfile.write(l)
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment