import argparse
import os
import re
import logging
import shutil

logging.basicConfig(format='%(levelname)s: %(message)s')

_comment = re.compile(r'\\begin\{comment\}[\s\S]*\\end{comment}|(?<!\\)%.*\n')
_empty_line = re.compile(r'\n\s*\n')
_graphics = re.compile(r'\\includegraphics(\[.*\]|)\{(.*)\}')
_tex = re.compile(r'\\input\{(.*)\}')


def clean_file(folder_path, file, visited):
    arxiv_folder = os.path.join(folder_path, 'arxiv')

    with open(os.path.join(folder_path, file), 'r') as f:
        content = f.read()
        # remove comments
        content = _comment.sub('', content)
        content = _empty_line.sub('\n\n', content)
        # copy figures
        for _, figure in _graphics.findall(content):
            shutil.copy(os.path.join(folder_path, figure),
                        os.path.join(arxiv_folder, figure.replace('/', '-')))
        # rename the reference to figures to correct one.
        content = _graphics.sub(lambda figure: figure.group().replace('/', '-'), content)
        visited.add(file)
        for ref_tex in _tex.findall(content):
            ref_tex = '{}.tex'.format(ref_tex) if not ref_tex.endswith('.tex') else ref_tex
            if ref_tex not in visited:
                clean_file(folder_path, ref_tex, visited)
            visited.add(ref_tex)
        with open(os.path.join(arxiv_folder, '{}'.format(file)), 'w') as write_f:
            write_f.write(content)


def main(folder, main_tex):
    folder_path = os.path.abspath(folder)
    arxiv_folder = os.path.join(folder_path, 'arxiv')
    if os.path.exists(arxiv_folder):
        shutil.rmtree(arxiv_folder)
    os.mkdir(arxiv_folder)

    visited = set()
    clean_file(folder_path, main_tex, visited)

    for file in os.listdir(folder_path):
        if os.path.join(folder_path, file):
            # report not used tex files
            if file.endswith('.tex') and file not in visited:
                logging.warning('{} not used, ignored.'.format(file))
            if file.endswith('.bbl') or file.endswith('.cls') or file.endswith('bst') or file.endswith('.sty'):
                # copy auxiliary files
                shutil.copy(os.path.join(folder_path, file), os.path.join(arxiv_folder, file))

    # check if bbl file is present
    if len(tuple(filter(lambda file: file.endswith('.bbl'), os.listdir(arxiv_folder)))) == 0:
        logging.warning('no .bbl file is copied, forgot to run `bibtex`? Ignore this if you do not use `bibtex`')


if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser(description=__doc__)
    arg_parser.add_argument('folder', metavar='FOLDER', type=str, nargs=1,
                            help='The precompiled latex folder.')
    arg_parser.add_argument('-m', '--main', type=str, required=False, default='main.tex',
                            help='The main tex file.')

    results = arg_parser.parse_args()
    main(results.folder[0], results.main)