import argparse import os import re import logging import shutil logging.basicConfig(format='%(levelname)s: %(message)s') _comment = re.compile(r'\\begin\{comment\}[\s\S]*\\end{comment}|(?<!\\)%.*\n') _empty_line = re.compile(r'\n\s*\n') _graphics = re.compile(r'\\includegraphics(\[.*\]|)\{(.*)\}') _tex = re.compile(r'\\input\{(.*)\}') def clean_file(folder_path, file, visited): arxiv_folder = os.path.join(folder_path, 'arxiv') with open(os.path.join(folder_path, file), 'r') as f: content = f.read() # remove comments content = _comment.sub('', content) content = _empty_line.sub('\n\n', content) # copy figures for _, figure in _graphics.findall(content): shutil.copy(os.path.join(folder_path, figure), os.path.join(arxiv_folder, figure.replace('/', '-'))) # rename the reference to figures to correct one. content = _graphics.sub(lambda figure: figure.group().replace('/', '-'), content) visited.add(file) for ref_tex in _tex.findall(content): ref_tex = '{}.tex'.format(ref_tex) if not ref_tex.endswith('.tex') else ref_tex if ref_tex not in visited: clean_file(folder_path, ref_tex, visited) visited.add(ref_tex) with open(os.path.join(arxiv_folder, '{}'.format(file)), 'w') as write_f: write_f.write(content) def main(folder, main_tex): folder_path = os.path.abspath(folder) arxiv_folder = os.path.join(folder_path, 'arxiv') if os.path.exists(arxiv_folder): shutil.rmtree(arxiv_folder) os.mkdir(arxiv_folder) visited = set() clean_file(folder_path, main_tex, visited) for file in os.listdir(folder_path): if os.path.join(folder_path, file): # report not used tex files if file.endswith('.tex') and file not in visited: logging.warning('{} not used, ignored.'.format(file)) if file.endswith('.bbl') or file.endswith('.cls') or file.endswith('bst') or file.endswith('.sty'): # copy auxiliary files shutil.copy(os.path.join(folder_path, file), os.path.join(arxiv_folder, file)) # check if bbl file is present if len(tuple(filter(lambda file: file.endswith('.bbl'), os.listdir(arxiv_folder)))) == 0: logging.warning('no .bbl file is copied, forgot to run `bibtex`? Ignore this if you do not use `bibtex`') if __name__ == '__main__': arg_parser = argparse.ArgumentParser(description=__doc__) arg_parser.add_argument('folder', metavar='FOLDER', type=str, nargs=1, help='The precompiled latex folder.') arg_parser.add_argument('-m', '--main', type=str, required=False, default='main.tex', help='The main tex file.') results = arg_parser.parse_args() main(results.folder[0], results.main)