Last active
April 29, 2024 10:58
-
-
Save samuelsaari/07cfff92e157fad84cd9d1988e108d95 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TexPublish - Preparing LaTeX Projects for Publication with Ease | |
# URL: https://gist.github.com/samuelsaari/07cfff92e157fad84cd9d1988e108d95 | |
# Author: Miika Mäki https://github.com/samuelsaari | |
# - creates a new directory for publishable files | |
# - copies files matching user defined regular expressions to the new directory | |
# - copies all desired files that are used by the main tex document (like figures and tables) | |
# - creates a parsed bibliography that includes only entries used by the main tex file | |
# - flattens the tex file (inserts \input and \include commands and custom preambles in the main tex file) | |
# - pastes the parsed bibliography to the flattened tex file if desired | |
# - removes all comments from main tex file if desired | |
# - BONUS: possibility to include/exclude desired versions (unrobust and requires customizing in the tex-file) | |
from doctest import master | |
import os | |
import shutil | |
import glob | |
import re | |
from pathlib import Path | |
import sys | |
import regex | |
import tempfile | |
import filecmp | |
from shutil import copyfile | |
from copy import deepcopy | |
os.chdir(os.path.dirname(__file__)) # working directory to file directory | |
# deleting existing directory | |
def delete_and_create_folder(new_folder): | |
try: | |
shutil.rmtree(new_folder) | |
except: | |
print ("Deletion of the directory %s failed" % new_folder) | |
else: | |
print ("Successfully deleted the directory %s" % new_folder) | |
# creating new directory | |
try: | |
os.mkdir(new_folder) | |
os.mkdir(new_folder + '/img') | |
os.mkdir(new_folder + '/img2') | |
os.mkdir(new_folder + '/tab') | |
os.mkdir(new_folder + '/tab2') | |
except OSError: | |
sys.exit("Creation of the directory %s failed" % new_folder) | |
else: | |
print ("Successfully created the directory %s " % new_folder) | |
list_of_nonfloats=[] # initialize list of non_floats | |
def copy_non_floats(include_regex:str, exclude_regex:str=r'^((?!Copy).)*$',create_dep_file=False,list_of_nonfloats:list=list_of_nonfloats): | |
''' | |
copy files that match the regular expression parameter "include_regex" | |
note that by default excluding files that contain the word "Copy" | |
''' | |
for name in glob.glob("*"): | |
#print(f'item in directory:{name}') | |
if re.search(include_regex,name): | |
#print(f'a first match with: {name}') | |
if re.search(exclude_regex, name): # does not contain "Copy" | |
list_of_nonfloats.append(name) | |
shutil.copy(name, TARGET_DIR) | |
#print(f'name of file:{name}') | |
#print(f'list thus far:{list_of_nonfloats}') | |
STEM= Path(name).stem | |
#print(STEM) | |
if create_dep_file==True and re.search(r'tex$',name): | |
if verbose: print(f'--tex file to use:{name}--') | |
if verbose: print(f'--dep file to use:{STEM}.dep--') | |
else: | |
pass | |
return(STEM) # will produce an error if does not find anything | |
def copy_float_files(DEP_FILE,EXTENSIONS,TARGET_DIR): | |
''' | |
Edited from https://tex.stackexchange.com/a/162763/244928 by Stack Exchange user mriss: | |
''' | |
with open(DEP_FILE, 'r') as f: | |
for line in f: | |
if '*{file}' not in line: | |
continue | |
value = line.split('{')[2].split('}') | |
#if verbose: print(f'value: {value}') | |
source = value[0] | |
if re.search(r'babel-.*.tex',source): # a work around hack | |
continue | |
_, e = os.path.splitext(source) | |
e = e.lower()[1:] | |
# print(f'e: {e}') | |
if re.search(r'\.code\.tex',source) or source=="xkeyval.tex": # NB! other extensions might be needed (check .dep file) | |
#print(f'exclude auxiliary file: {source}') | |
continue | |
elif e not in EXTENSIONS: | |
continue | |
if verbose: print(f'{source}') | |
shutil.copy(source, TARGET_DIR + source) # add "/floats" later | |
#-------------------------------------------------------------------- | |
#flatten function | |
# Original Author: Timo Korthals [email protected] | |
# https://gist.github.com/tik0/7a8beb82e4e01e91a7ff6146dac06055 | |
# which was based on # Template by https://gist.github.com/restrepo/4207109 | |
# Function: | |
# Convert a master latex file | |
# into a single document by including hierarchically | |
# automatically all the LaTeX documents | |
# which are arguments of | |
# \include, \input, or \import | |
# ignoring any \includeonly | |
# includes also custom preambles as long as the the regex is defined accordingly | |
def flatten(masterfile, flattenfile, verbose, preamble_regex:str): | |
filetex=open(masterfile,'r') | |
texlist=filetex.readlines() | |
finaltex=open(flattenfile,'w') | |
for i in texlist: | |
dependency_match=False | |
is_preamble=False | |
if re.match(preamble_regex,i)!=None: | |
is_preamble=True | |
dependency_match=True | |
elif re.match('(\s)*\\\\input{',i)!=None or re.match('(\s)*\\\\include{',i)!=None or re.match('(\s)*\\\\import{',i)!=None: | |
dependency_match=True | |
if re.match(r'.+\.sum}$',i): | |
dependency_match=False | |
if verbose: print(f"ignoring a match in line including '.sum': {i}") | |
if dependency_match: | |
#if verbose: print('Command: '+i[:-1]) | |
# Remove trailing comments which might taint the following processing | |
command=i.split('%')[0] | |
filename=command.split('{')[-1].split('}')[0] | |
# Check for expansion | |
if is_preamble: | |
if filename[-3:]!='sty': | |
filename=filename+'.sty' # or .tex | |
#if verbose: print(f'preamble file name: {filename}') | |
elif filename[-3:]!='tex': | |
filename=filename+'.tex' # or .tex | |
# Check for root if command is '\import' | |
dirname='./' | |
if i.find(r'\import{')==0: | |
dirname=dirname+i.split('{')[-2].split('}')[0]+'/' | |
filename=dirname+filename | |
if verbose==True: | |
print('Import: '+filename) | |
includetex=open(filename,'r') | |
finaltex.write(includetex.read()) | |
finaltex.write('\n') | |
elif i.find(r'\includeonly{')==0: | |
finaltex.write(i.replace(r'\includeonly{',r'%\includeonly{')) | |
else: | |
finaltex.write(i) | |
filetex.close() | |
finaltex.close() | |
return | |
# Remove all comments (also originally by Timo Korthals) | |
def remove_comments(): | |
exclude_text_within_comment_block=None | |
print('---Removing comments---') | |
with open(flattenfile,'r') as filetex: | |
texlist=filetex.readlines() | |
finaltex=open(flattenfile,'w') | |
for i in texlist: | |
if re.match('^(\s)*%(\s)*TC(\s)*:.+',i): # match lines starting with %TC: | |
texcount=True | |
else: | |
texcount=False | |
if re.match('(\s)*%',i)!=None and texcount==False: # match all lines which are just comments | |
continue | |
elif re.search('[^\\\\-]%',i)!=None: # match all comments after commands, but not '\%' | |
finaltex.write(i[0:re.search('[^\\\\-]%',i).start(0)+1]+'\n') | |
continue | |
elif re.match(r'(\s)*\\begin{comment}.*',i): | |
exclude_text_within_comment_block=True | |
continue | |
elif re.match(r'(\s)*\\end{comment}.*',i): | |
exclude_text_within_comment_block=False | |
#if verbose: print(line) | |
continue | |
else: | |
if exclude_text_within_comment_block: | |
continue | |
else: | |
finaltex.write(i) | |
#filetex.close() | |
finaltex.close() | |
def create_citation_set(MAIN_STEM:str): | |
AUX_FILE=MAIN_STEM + ".aux" | |
citation_list=[] | |
with open(AUX_FILE, "r") as f: | |
lines = f.readlines() | |
for line in lines: | |
#print(line) | |
if re.match(r'^(\s)*\\citation{.*}$',line.strip("\n"))!=None: # bibtex | |
files=line.split('{')[-1].split('}')[0] | |
citations=files.split(',') | |
for citation in citations: | |
citation_list.append(citation) | |
if re.match(r'^(\s)*\\abx@aux@cite{0}',line.strip("\n"))!=None: #biblatex | |
citation=line.split('{')[-1].split('}')[0] | |
citation_list.append(citation) | |
# print('-----') | |
# print(line) | |
# print(citation) | |
citation_set=set(citation_list) | |
return(citation_set) | |
def create_parsed_library(BIB_STEM:str,citation_set:set): | |
MAIN_BIB=BIB_STEM + ".bib" | |
PARSED_BIB=BIB_STEM + "Parsed.bib" | |
with open(MAIN_BIB, "r",encoding="utf8") as f: | |
lines = f.readlines() | |
with open(PARSED_BIB, "w",encoding="utf8") as f: | |
write=False | |
for line in lines: | |
if re.match(r'^@.+,$',line.strip("\n")): | |
line_citation=line.split('{')[-1].split(',')[0] | |
if line_citation in citation_set: | |
write=True | |
# print('---------') | |
# print('MATCH FOUND') | |
# print(line) | |
# print(line_citation) | |
elif re.match(r'^(\s)*}(\s)*$',line.strip("\n")) and write==True: | |
f.write(line) | |
f.write('\n') | |
write=False | |
continue | |
else: | |
pass | |
if write: | |
f.write(line) | |
def flatten_or_parse_bibliography_and_remove_ProvidesPackage(): | |
# Edit flattened file | |
# Write into function ? | |
# generate seperate function for if else (W S , A B) | |
if include_bibliography_in_main_tex_file: | |
PARSED_OR_PASTED_BIB=BIB_PASTED | |
else: | |
PARSED_OR_PASTED_BIB=PARSED_BIB | |
with open(flattenfile, "r") as f: | |
lines_flatten = f.readlines() | |
with open(PARSED_BIB, "r",encoding="utf8") as f: | |
lines_parsed_bib = f.readlines() | |
with open(flattenfile, "w") as f: | |
#if verbose: print('deleted following lines from preamble part in the flattened version:') | |
for line in lines_flatten: | |
if re.match(r'(\s)*\\ProvidesPackage{.*',line.strip("\n")): | |
pass | |
elif re.match(r'(\s)*\\documentclass\[.*\]{.*}.*',line.strip("\n")) and include_bibliography_in_main_tex_file: | |
#print('----match for documentclass-----') | |
f.write(line) | |
f.write(r"\usepackage{filecontents}\begin{filecontents}{" +BIB_PASTED +"}") | |
f.write('\n') | |
for parsed_bib_line in lines_parsed_bib: | |
##print(parsed_bib_line) | |
f.write(parsed_bib_line) | |
f.write(r'\end{filecontents}') | |
f.write('\n') | |
elif re.match(r'^(\s)*(\\)bibliography{.*}.*',line): | |
BIB_LINE=r'\bibliography{' + PARSED_OR_PASTED_BIB + '}' | |
f.write(BIB_LINE) | |
f.write('\n') | |
#biblatex | |
elif re.match(r'^(\s)*(\\)addbibresource{.*}.*',line): | |
BIB_LINE=r'\addbibresource{' + PARSED_OR_PASTED_BIB + '}' | |
f.write(BIB_LINE) | |
f.write('\n') | |
# elif re.match(r'^(\s)*\\printbibliography.*',line): | |
# f.write(line) | |
else: | |
f.write(line) | |
#print(line) | |
def include_only_desired_versions(v1_value:str,v2_value:str): | |
v1_boolean=None | |
write_line_boolean=True | |
with open(flattenfile, "r") as f: | |
lines_flatten = f.readlines() | |
#------------ determine which version to use | |
for line in lines_flatten: | |
if re.match(rf'(\s)*\\{v1_value}true.*',line): | |
v1_boolean=True | |
version_number=1 | |
value_to_include=v1_value | |
value_to_exclude=v2_value | |
print(f'--Including version {v1_value}---') | |
break | |
if re.match(rf'(\s)*\\{v1_value}false.*',line): | |
v1_boolean=False | |
version_number=2 | |
value_to_include=v2_value | |
value_to_exclude=v1_value | |
print(f'--Including version {v2_value}---') | |
break | |
if v1_boolean==None: | |
raise Exception("Did not catch which version to run on") | |
# exclude text | |
with open(flattenfile, "w") as f: | |
for line in lines_flatten: | |
stripped_line=line.strip("\n") | |
#----------exclude between \begin{X} and end{X}------------------- | |
if re.match(rf'(\s)*\\begin{{{value_to_exclude}}}.*',stripped_line): | |
write_line_boolean=False | |
#print('----Excluding text between the following placeholders---') | |
#print(line) | |
continue | |
elif re.match(rf'(\s)*\\end{{{value_to_exclude}}}.*',stripped_line): | |
write_line_boolean=True | |
#if verbose: print(line) | |
continue | |
#-------- Choose only contents of either \toV1orV2{This comes to V1}{This to V2}--------- | |
elif re.match(rf".*\\to{v1_value}or{v2_value}{{.*}}{{.*}}.*",stripped_line) and not re.match(r'^(\s)*\\newcommand',stripped_line): | |
#initializing helper parameters | |
match_count=0 | |
brace_count=0 | |
new_line="" | |
match_helper="" | |
match_dictionary={} | |
match_active=False | |
toV1orV2_string=rf'\to{v1_value}or{v2_value}' # dynamically defining the value of the version string NB! check that "\" correctly | |
#print(f"LINE WITH {v_string}") | |
for letter in line: | |
if match_active==False: | |
new_line += letter # start writing sentence | |
if new_line.endswith(toV1orV2_string): # check v_string if does not work | |
new_line=new_line.split(toV1orV2_string)[0] # extract only until the \toV1orV2 starts | |
match_active=True | |
continue | |
elif match_active: | |
match_helper+=letter # start writing match text | |
if letter=="{": | |
brace_count +=1 | |
elif letter=="}": | |
brace_count -=1 | |
if brace_count==0: # if outer brace closes, store value in dictionary | |
match_count+=1 | |
match_dictionary[match_count]=match_helper | |
match_helper="" | |
if match_count==2: # when two matches in dictionary, write the right one in sentence an continue | |
new_line+=match_dictionary[version_number][1:-1] # removes the {} from beginning and end. NB! Not robust! | |
match_dictionary={} | |
match_count=0 | |
match_active=False | |
line=new_line | |
#if verbose: | |
#print(f'---added sentence from version {value_to_include}:--') | |
#print(line) | |
if write_line_boolean: | |
f.write(line) | |
# else: | |
# print(line) | |
#---------------------------------------------------------------- | |
#---------------------------------------------------------------- | |
#---------------------------------------------------------------- | |
# Latex dependencies | |
# % for the copy_float_files function to work, you will need to add... | |
# \RequirePackage{snapshot} | |
# % ..before documentclass[<options>]{<document class>} | |
# % it will create a .dep file of the dependencies | |
# %For equations: | |
# %"generate" parameter is the name of the output .tex-file | |
# \usepackage[active,generate=equations,extract-env={equation,align}]{extract} | |
# BONUS: | |
# If you want to use the "include_only_desired_versions"- function, you will have to (note that this is hacky and unrobust): | |
# 1) setup the comment package: https://www.texlive.info/CTAN/macros/latex/contrib/comment/comment.pdf | |
# one way to do this is as follows: | |
# %Initiating logicals (define them in the next chunk) | |
# \newif\ifA | |
# \newif\ifB | |
# % Changing these to true or false changes the document | |
# \Afalse | |
# \usepackage{comment} | |
# % set the comment environments based on the booleans | |
# \ifA | |
# \includecomment{A} | |
# \excludecomment{B} | |
# \else | |
# \Afalse % redundant ? | |
# \excludecomment{A} | |
# \includecomment{B} | |
# \fi | |
# Now your Latex document (include_only_desired_versions) will only include either text that is between \begin{A}...and...\end{A}. | |
# Note that A and B could be any other letters or strings | |
# 2) define custom command for similar behavior within lines. | |
# \newcommand{\toAorB}[2]{\ifA {#1} \else {#2} \fi} % The "A" logical has been defined above | |
# To use it in text: | |
# "Let us have a look at the following float \toAorB{tableA}{figureB}" (Version A will include a tableA and version B a figureB) | |
# Again, A or B could be any other strings. | |
# This works for multiple \toV1orV2 indices per line and you can mix different versions in the same line (like \toCorD and \toAorB) | |
# Note that you will have to be consistent with the order | |
# If this does not work as intended, it is probably due to: | |
# a) setup not done properly | |
# b) "{" does not start directly after \toV1orV2 (e.g. \toV1orV2) | |
# c) There are unmatching braces inside \toV1orV2{}{} | |
# Note that this code contains minimal error handling | |
#................................................................... | |
# Change code below the dotted lines | |
#................................................................... | |
# !!!!!!!!!!!!!!!!!!!!!!!!! | |
# TO DO | |
# IMPLEMENT CHANGES FOR BIBLATEX | |
# !!!!!!!!!!!!!!!!!!!!!!!!!!! | |
# Changing parameters for the flattening process | |
verbose=False # print what the programme does in detail | |
remove_comments_boolean=False # removes comments | |
include_bibliography_in_main_tex_file=False # \document class will need to be on one line | |
# target directory | |
new_folder='files_to_publisher' # if you execute this from the same directory as the tex file, this will be enough | |
delete_and_create_folder(new_folder) | |
TARGET_DIR = new_folder + '/' | |
# Deciding which files to move that the main tex file has used | |
EXTENSIONS = ['pdf', 'pdf_tex', 'png','tex'] # change: extensions will be extracted from the .dep file | |
# copying non-floats to new directory (and extracting some info along the way) | |
MAIN_STEM=copy_non_floats(include_regex=r'^\dManuscript.*(pdf|tex)$',create_dep_file=True) # change: main tex/pdf files | |
DEP_FILE=MAIN_STEM + ".dep" | |
TEX_FILE=MAIN_STEM + ".tex" | |
masterfile=TEX_FILE | |
flattenfile=new_folder + '/' + MAIN_STEM + "_Flattened.tex" # path to the flattened file | |
BIB_STEM=copy_non_floats(include_regex=r'^(\d)*My(\s)*Library(\.bib)$',exclude_regex=r'^((?![Pp]arsed).)*$') # change: | |
PARSED_BIB= BIB_STEM + "Parsed.bib" | |
BIB_PASTED=BIB_STEM + "ParsedAndPasted.bib" | |
copy_non_floats(include_regex=r'equations.tex') #change: | |
copy_non_floats(include_regex=r'^\dCoverLetter.*(pdf)$') # change: | |
copy_non_floats(include_regex=r'^\dTitlePage.*(pdf|tex)$') # change: | |
copy_non_floats(include_regex=r'^\dPreamble.*(sty)$') #change: | |
# creating and copying parsed bibliography (only references you have used ) | |
citation_set=create_citation_set(MAIN_STEM) | |
#print(citation_set) | |
create_parsed_library(BIB_STEM,citation_set) | |
copyfile(PARSED_BIB,new_folder + "/" + PARSED_BIB) | |
if verbose: | |
print('------------copying the following files-----------') | |
print('----copying non_floats ---') | |
print(list_of_nonfloats) | |
print('----copying float files------') | |
copy_float_files(DEP_FILE,EXTENSIONS,TARGET_DIR) | |
if verbose: print('--------------Start flattening-------------------') | |
flatten(masterfile, flattenfile, verbose,preamble_regex='(\s)*\\\\usepackage{.*[Pp]reamble.*}') | |
if remove_comments_boolean: remove_comments() | |
flatten_or_parse_bibliography_and_remove_ProvidesPackage() | |
include_only_desired_versions("SHORT","LONG") # note that order does matter (but will fetch the right version dynamically) | |
include_only_desired_versions("EMBED","END") | |
#................................................................... | |
# print(f'if results not as excpected, check that {flattenfile} and {masterfile} are as they should be') | |
print(flattenfile+' produced') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment