Skip to content

Instantly share code, notes, and snippets.

@samuelsaari
Last active April 29, 2024 10:58
Show Gist options
  • Save samuelsaari/07cfff92e157fad84cd9d1988e108d95 to your computer and use it in GitHub Desktop.
Save samuelsaari/07cfff92e157fad84cd9d1988e108d95 to your computer and use it in GitHub Desktop.
# TexPublish - Preparing LaTeX Projects for Publication with Ease
# URL: https://gist.github.com/samuelsaari/07cfff92e157fad84cd9d1988e108d95
# Author: Miika Mäki https://github.com/samuelsaari
# - creates a new directory for publishable files
# - copies files matching user defined regular expressions to the new directory
# - copies all desired files that are used by the main tex document (like figures and tables)
# - creates a parsed bibliography that includes only entries used by the main tex file
# - flattens the tex file (inserts \input and \include commands and custom preambles in the main tex file)
# - pastes the parsed bibliography to the flattened tex file if desired
# - removes all comments from main tex file if desired
# - BONUS: possibility to include/exclude desired versions (unrobust and requires customizing in the tex-file)
from doctest import master
import os
import shutil
import glob
import re
from pathlib import Path
import sys
import regex
import tempfile
import filecmp
from shutil import copyfile
from copy import deepcopy
os.chdir(os.path.dirname(__file__)) # working directory to file directory
# deleting existing directory
def delete_and_create_folder(new_folder):
try:
shutil.rmtree(new_folder)
except:
print ("Deletion of the directory %s failed" % new_folder)
else:
print ("Successfully deleted the directory %s" % new_folder)
# creating new directory
try:
os.mkdir(new_folder)
os.mkdir(new_folder + '/img')
os.mkdir(new_folder + '/img2')
os.mkdir(new_folder + '/tab')
os.mkdir(new_folder + '/tab2')
except OSError:
sys.exit("Creation of the directory %s failed" % new_folder)
else:
print ("Successfully created the directory %s " % new_folder)
list_of_nonfloats=[] # initialize list of non_floats
def copy_non_floats(include_regex:str, exclude_regex:str=r'^((?!Copy).)*$',create_dep_file=False,list_of_nonfloats:list=list_of_nonfloats):
'''
copy files that match the regular expression parameter "include_regex"
note that by default excluding files that contain the word "Copy"
'''
for name in glob.glob("*"):
#print(f'item in directory:{name}')
if re.search(include_regex,name):
#print(f'a first match with: {name}')
if re.search(exclude_regex, name): # does not contain "Copy"
list_of_nonfloats.append(name)
shutil.copy(name, TARGET_DIR)
#print(f'name of file:{name}')
#print(f'list thus far:{list_of_nonfloats}')
STEM= Path(name).stem
#print(STEM)
if create_dep_file==True and re.search(r'tex$',name):
if verbose: print(f'--tex file to use:{name}--')
if verbose: print(f'--dep file to use:{STEM}.dep--')
else:
pass
return(STEM) # will produce an error if does not find anything
def copy_float_files(DEP_FILE,EXTENSIONS,TARGET_DIR):
'''
Edited from https://tex.stackexchange.com/a/162763/244928 by Stack Exchange user mriss:
'''
with open(DEP_FILE, 'r') as f:
for line in f:
if '*{file}' not in line:
continue
value = line.split('{')[2].split('}')
#if verbose: print(f'value: {value}')
source = value[0]
if re.search(r'babel-.*.tex',source): # a work around hack
continue
_, e = os.path.splitext(source)
e = e.lower()[1:]
# print(f'e: {e}')
if re.search(r'\.code\.tex',source) or source=="xkeyval.tex": # NB! other extensions might be needed (check .dep file)
#print(f'exclude auxiliary file: {source}')
continue
elif e not in EXTENSIONS:
continue
if verbose: print(f'{source}')
shutil.copy(source, TARGET_DIR + source) # add "/floats" later
#--------------------------------------------------------------------
#flatten function
# Original Author: Timo Korthals [email protected]
# https://gist.github.com/tik0/7a8beb82e4e01e91a7ff6146dac06055
# which was based on # Template by https://gist.github.com/restrepo/4207109
# Function:
# Convert a master latex file
# into a single document by including hierarchically
# automatically all the LaTeX documents
# which are arguments of
# \include, \input, or \import
# ignoring any \includeonly
# includes also custom preambles as long as the the regex is defined accordingly
def flatten(masterfile, flattenfile, verbose, preamble_regex:str):
filetex=open(masterfile,'r')
texlist=filetex.readlines()
finaltex=open(flattenfile,'w')
for i in texlist:
dependency_match=False
is_preamble=False
if re.match(preamble_regex,i)!=None:
is_preamble=True
dependency_match=True
elif re.match('(\s)*\\\\input{',i)!=None or re.match('(\s)*\\\\include{',i)!=None or re.match('(\s)*\\\\import{',i)!=None:
dependency_match=True
if re.match(r'.+\.sum}$',i):
dependency_match=False
if verbose: print(f"ignoring a match in line including '.sum': {i}")
if dependency_match:
#if verbose: print('Command: '+i[:-1])
# Remove trailing comments which might taint the following processing
command=i.split('%')[0]
filename=command.split('{')[-1].split('}')[0]
# Check for expansion
if is_preamble:
if filename[-3:]!='sty':
filename=filename+'.sty' # or .tex
#if verbose: print(f'preamble file name: {filename}')
elif filename[-3:]!='tex':
filename=filename+'.tex' # or .tex
# Check for root if command is '\import'
dirname='./'
if i.find(r'\import{')==0:
dirname=dirname+i.split('{')[-2].split('}')[0]+'/'
filename=dirname+filename
if verbose==True:
print('Import: '+filename)
includetex=open(filename,'r')
finaltex.write(includetex.read())
finaltex.write('\n')
elif i.find(r'\includeonly{')==0:
finaltex.write(i.replace(r'\includeonly{',r'%\includeonly{'))
else:
finaltex.write(i)
filetex.close()
finaltex.close()
return
# Remove all comments (also originally by Timo Korthals)
def remove_comments():
exclude_text_within_comment_block=None
print('---Removing comments---')
with open(flattenfile,'r') as filetex:
texlist=filetex.readlines()
finaltex=open(flattenfile,'w')
for i in texlist:
if re.match('^(\s)*%(\s)*TC(\s)*:.+',i): # match lines starting with %TC:
texcount=True
else:
texcount=False
if re.match('(\s)*%',i)!=None and texcount==False: # match all lines which are just comments
continue
elif re.search('[^\\\\-]%',i)!=None: # match all comments after commands, but not '\%'
finaltex.write(i[0:re.search('[^\\\\-]%',i).start(0)+1]+'\n')
continue
elif re.match(r'(\s)*\\begin{comment}.*',i):
exclude_text_within_comment_block=True
continue
elif re.match(r'(\s)*\\end{comment}.*',i):
exclude_text_within_comment_block=False
#if verbose: print(line)
continue
else:
if exclude_text_within_comment_block:
continue
else:
finaltex.write(i)
#filetex.close()
finaltex.close()
def create_citation_set(MAIN_STEM:str):
AUX_FILE=MAIN_STEM + ".aux"
citation_list=[]
with open(AUX_FILE, "r") as f:
lines = f.readlines()
for line in lines:
#print(line)
if re.match(r'^(\s)*\\citation{.*}$',line.strip("\n"))!=None: # bibtex
files=line.split('{')[-1].split('}')[0]
citations=files.split(',')
for citation in citations:
citation_list.append(citation)
if re.match(r'^(\s)*\\abx@aux@cite{0}',line.strip("\n"))!=None: #biblatex
citation=line.split('{')[-1].split('}')[0]
citation_list.append(citation)
# print('-----')
# print(line)
# print(citation)
citation_set=set(citation_list)
return(citation_set)
def create_parsed_library(BIB_STEM:str,citation_set:set):
MAIN_BIB=BIB_STEM + ".bib"
PARSED_BIB=BIB_STEM + "Parsed.bib"
with open(MAIN_BIB, "r",encoding="utf8") as f:
lines = f.readlines()
with open(PARSED_BIB, "w",encoding="utf8") as f:
write=False
for line in lines:
if re.match(r'^@.+,$',line.strip("\n")):
line_citation=line.split('{')[-1].split(',')[0]
if line_citation in citation_set:
write=True
# print('---------')
# print('MATCH FOUND')
# print(line)
# print(line_citation)
elif re.match(r'^(\s)*}(\s)*$',line.strip("\n")) and write==True:
f.write(line)
f.write('\n')
write=False
continue
else:
pass
if write:
f.write(line)
def flatten_or_parse_bibliography_and_remove_ProvidesPackage():
# Edit flattened file
# Write into function ?
# generate seperate function for if else (W S , A B)
if include_bibliography_in_main_tex_file:
PARSED_OR_PASTED_BIB=BIB_PASTED
else:
PARSED_OR_PASTED_BIB=PARSED_BIB
with open(flattenfile, "r") as f:
lines_flatten = f.readlines()
with open(PARSED_BIB, "r",encoding="utf8") as f:
lines_parsed_bib = f.readlines()
with open(flattenfile, "w") as f:
#if verbose: print('deleted following lines from preamble part in the flattened version:')
for line in lines_flatten:
if re.match(r'(\s)*\\ProvidesPackage{.*',line.strip("\n")):
pass
elif re.match(r'(\s)*\\documentclass\[.*\]{.*}.*',line.strip("\n")) and include_bibliography_in_main_tex_file:
#print('----match for documentclass-----')
f.write(line)
f.write(r"\usepackage{filecontents}\begin{filecontents}{" +BIB_PASTED +"}")
f.write('\n')
for parsed_bib_line in lines_parsed_bib:
##print(parsed_bib_line)
f.write(parsed_bib_line)
f.write(r'\end{filecontents}')
f.write('\n')
elif re.match(r'^(\s)*(\\)bibliography{.*}.*',line):
BIB_LINE=r'\bibliography{' + PARSED_OR_PASTED_BIB + '}'
f.write(BIB_LINE)
f.write('\n')
#biblatex
elif re.match(r'^(\s)*(\\)addbibresource{.*}.*',line):
BIB_LINE=r'\addbibresource{' + PARSED_OR_PASTED_BIB + '}'
f.write(BIB_LINE)
f.write('\n')
# elif re.match(r'^(\s)*\\printbibliography.*',line):
# f.write(line)
else:
f.write(line)
#print(line)
def include_only_desired_versions(v1_value:str,v2_value:str):
v1_boolean=None
write_line_boolean=True
with open(flattenfile, "r") as f:
lines_flatten = f.readlines()
#------------ determine which version to use
for line in lines_flatten:
if re.match(rf'(\s)*\\{v1_value}true.*',line):
v1_boolean=True
version_number=1
value_to_include=v1_value
value_to_exclude=v2_value
print(f'--Including version {v1_value}---')
break
if re.match(rf'(\s)*\\{v1_value}false.*',line):
v1_boolean=False
version_number=2
value_to_include=v2_value
value_to_exclude=v1_value
print(f'--Including version {v2_value}---')
break
if v1_boolean==None:
raise Exception("Did not catch which version to run on")
# exclude text
with open(flattenfile, "w") as f:
for line in lines_flatten:
stripped_line=line.strip("\n")
#----------exclude between \begin{X} and end{X}-------------------
if re.match(rf'(\s)*\\begin{{{value_to_exclude}}}.*',stripped_line):
write_line_boolean=False
#print('----Excluding text between the following placeholders---')
#print(line)
continue
elif re.match(rf'(\s)*\\end{{{value_to_exclude}}}.*',stripped_line):
write_line_boolean=True
#if verbose: print(line)
continue
#-------- Choose only contents of either \toV1orV2{This comes to V1}{This to V2}---------
elif re.match(rf".*\\to{v1_value}or{v2_value}{{.*}}{{.*}}.*",stripped_line) and not re.match(r'^(\s)*\\newcommand',stripped_line):
#initializing helper parameters
match_count=0
brace_count=0
new_line=""
match_helper=""
match_dictionary={}
match_active=False
toV1orV2_string=rf'\to{v1_value}or{v2_value}' # dynamically defining the value of the version string NB! check that "\" correctly
#print(f"LINE WITH {v_string}")
for letter in line:
if match_active==False:
new_line += letter # start writing sentence
if new_line.endswith(toV1orV2_string): # check v_string if does not work
new_line=new_line.split(toV1orV2_string)[0] # extract only until the \toV1orV2 starts
match_active=True
continue
elif match_active:
match_helper+=letter # start writing match text
if letter=="{":
brace_count +=1
elif letter=="}":
brace_count -=1
if brace_count==0: # if outer brace closes, store value in dictionary
match_count+=1
match_dictionary[match_count]=match_helper
match_helper=""
if match_count==2: # when two matches in dictionary, write the right one in sentence an continue
new_line+=match_dictionary[version_number][1:-1] # removes the {} from beginning and end. NB! Not robust!
match_dictionary={}
match_count=0
match_active=False
line=new_line
#if verbose:
#print(f'---added sentence from version {value_to_include}:--')
#print(line)
if write_line_boolean:
f.write(line)
# else:
# print(line)
#----------------------------------------------------------------
#----------------------------------------------------------------
#----------------------------------------------------------------
# Latex dependencies
# % for the copy_float_files function to work, you will need to add...
# \RequirePackage{snapshot}
# % ..before documentclass[<options>]{<document class>}
# % it will create a .dep file of the dependencies
# %For equations:
# %"generate" parameter is the name of the output .tex-file
# \usepackage[active,generate=equations,extract-env={equation,align}]{extract}
# BONUS:
# If you want to use the "include_only_desired_versions"- function, you will have to (note that this is hacky and unrobust):
# 1) setup the comment package: https://www.texlive.info/CTAN/macros/latex/contrib/comment/comment.pdf
# one way to do this is as follows:
# %Initiating logicals (define them in the next chunk)
# \newif\ifA
# \newif\ifB
# % Changing these to true or false changes the document
# \Afalse
# \usepackage{comment}
# % set the comment environments based on the booleans
# \ifA
# \includecomment{A}
# \excludecomment{B}
# \else
# \Afalse % redundant ?
# \excludecomment{A}
# \includecomment{B}
# \fi
# Now your Latex document (include_only_desired_versions) will only include either text that is between \begin{A}...and...\end{A}.
# Note that A and B could be any other letters or strings
# 2) define custom command for similar behavior within lines.
# \newcommand{\toAorB}[2]{\ifA {#1} \else {#2} \fi} % The "A" logical has been defined above
# To use it in text:
# "Let us have a look at the following float \toAorB{tableA}{figureB}" (Version A will include a tableA and version B a figureB)
# Again, A or B could be any other strings.
# This works for multiple \toV1orV2 indices per line and you can mix different versions in the same line (like \toCorD and \toAorB)
# Note that you will have to be consistent with the order
# If this does not work as intended, it is probably due to:
# a) setup not done properly
# b) "{" does not start directly after \toV1orV2 (e.g. \toV1orV2)
# c) There are unmatching braces inside \toV1orV2{}{}
# Note that this code contains minimal error handling
#...................................................................
# Change code below the dotted lines
#...................................................................
# !!!!!!!!!!!!!!!!!!!!!!!!!
# TO DO
# IMPLEMENT CHANGES FOR BIBLATEX
# !!!!!!!!!!!!!!!!!!!!!!!!!!!
# Changing parameters for the flattening process
verbose=False # print what the programme does in detail
remove_comments_boolean=False # removes comments
include_bibliography_in_main_tex_file=False # \document class will need to be on one line
# target directory
new_folder='files_to_publisher' # if you execute this from the same directory as the tex file, this will be enough
delete_and_create_folder(new_folder)
TARGET_DIR = new_folder + '/'
# Deciding which files to move that the main tex file has used
EXTENSIONS = ['pdf', 'pdf_tex', 'png','tex'] # change: extensions will be extracted from the .dep file
# copying non-floats to new directory (and extracting some info along the way)
MAIN_STEM=copy_non_floats(include_regex=r'^\dManuscript.*(pdf|tex)$',create_dep_file=True) # change: main tex/pdf files
DEP_FILE=MAIN_STEM + ".dep"
TEX_FILE=MAIN_STEM + ".tex"
masterfile=TEX_FILE
flattenfile=new_folder + '/' + MAIN_STEM + "_Flattened.tex" # path to the flattened file
BIB_STEM=copy_non_floats(include_regex=r'^(\d)*My(\s)*Library(\.bib)$',exclude_regex=r'^((?![Pp]arsed).)*$') # change:
PARSED_BIB= BIB_STEM + "Parsed.bib"
BIB_PASTED=BIB_STEM + "ParsedAndPasted.bib"
copy_non_floats(include_regex=r'equations.tex') #change:
copy_non_floats(include_regex=r'^\dCoverLetter.*(pdf)$') # change:
copy_non_floats(include_regex=r'^\dTitlePage.*(pdf|tex)$') # change:
copy_non_floats(include_regex=r'^\dPreamble.*(sty)$') #change:
# creating and copying parsed bibliography (only references you have used )
citation_set=create_citation_set(MAIN_STEM)
#print(citation_set)
create_parsed_library(BIB_STEM,citation_set)
copyfile(PARSED_BIB,new_folder + "/" + PARSED_BIB)
if verbose:
print('------------copying the following files-----------')
print('----copying non_floats ---')
print(list_of_nonfloats)
print('----copying float files------')
copy_float_files(DEP_FILE,EXTENSIONS,TARGET_DIR)
if verbose: print('--------------Start flattening-------------------')
flatten(masterfile, flattenfile, verbose,preamble_regex='(\s)*\\\\usepackage{.*[Pp]reamble.*}')
if remove_comments_boolean: remove_comments()
flatten_or_parse_bibliography_and_remove_ProvidesPackage()
include_only_desired_versions("SHORT","LONG") # note that order does matter (but will fetch the right version dynamically)
include_only_desired_versions("EMBED","END")
#...................................................................
# print(f'if results not as excpected, check that {flattenfile} and {masterfile} are as they should be')
print(flattenfile+' produced')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment