Created
May 7, 2021 16:36
-
-
Save zarzen/4c30ed748a662da97104d6c394213ef3 to your computer and use it in GitHub Desktop.
strip latex code for grammarly check
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import argparse | |
def get_args(): | |
arg_parser = argparse.ArgumentParser() | |
arg_parser.add_argument('--file') | |
args = arg_parser.parse_args() | |
return args | |
def remove_comments(lines): | |
processed = [] | |
for l in lines: | |
if l.startswith('%'): | |
pass | |
else: | |
processed.append(l) | |
return processed | |
def remove_block(lines, begin_pattern, end_pattern): | |
processed = [] | |
i = 0 | |
while i < len(lines): | |
if lines[i].startswith(begin_pattern): | |
# continue pass until \end{figure} or \end{figure*} | |
while i < len(lines) and not lines[i].startswith(end_pattern): | |
i+=1 | |
else: | |
processed.append(lines[i]) | |
i+=1 | |
return processed | |
def remove_fig(lines): | |
lines = remove_block(lines, '\\begin{figure}', '\\end{figure}') | |
lines = remove_block(lines, "\\begin{figure*}", '\\end{figure*}') | |
return lines | |
def remove_table(lines): | |
lines = remove_block(lines, '\\begin{table}', '\\end{table}') | |
lines = remove_block(lines, '\\begin{table*}', '\\end{table*}') | |
return lines | |
def remove_itemize(lines): | |
processed = [] | |
for l in lines: | |
if l.strip().startswith('\\begin{itemize}') or \ | |
l.strip().startswith('\\begin{itemize*}') or \ | |
l.strip().startswith('\\end{itemize}') or \ | |
l.strip().startswith('\\end{itemize*}'): | |
processed.append('') | |
else: | |
processed.append(l) | |
return processed | |
def strip_whitespaces(lines): | |
processed = [] | |
for l in lines: | |
processed.append(l.strip()) | |
return processed | |
def replace_ref_withXXX(lines): | |
processed = [] | |
for l in lines: | |
processed.append(re.sub(r"\\ref{fig:.*?}", "XX", l)) | |
return processed | |
def join_text_block(lines): | |
groups = [] | |
start_group = False | |
g = [] | |
for l in lines: | |
if l.strip() != '' and start_group == False: | |
start_group = True | |
if start_group: | |
g.append(l) | |
if start_group and l.strip() == '': | |
# end this group | |
groups.append(g) | |
g = [] | |
start_group = False | |
processed = [] | |
for g in groups: | |
processed.append(" ".join(g)) | |
processed.append('') | |
return processed | |
def gen_replace_sysname(sysname): | |
def _fn(lines): | |
processed = [] | |
for l in lines: | |
processed.append(re.sub(r"\\sysname", sysname, l)) | |
return processed | |
return _fn | |
def gen_remove_decorator(pattern): | |
pattern_str = "\\\\" + pattern + "{(.*?)}" | |
def _fn(lines): | |
processed = [] | |
for l in lines: | |
processed.append(re.sub(pattern_str, r"\1", l)) | |
return processed | |
return _fn | |
def remove_citation(lines): | |
processed = [] | |
for l in lines: | |
n = re.sub(r"~\\cite{.*?}", "", l) | |
n = re.sub(r'\\cite{.*?}', "", n) | |
processed.append(n) | |
return processed | |
def gen_replace_fn(match_str, replace_str): | |
def _fn(lines): | |
p = [] | |
for l in lines: | |
n = re.sub(match_str, replace_str, l) | |
p.append(n) | |
return p | |
return _fn | |
def main(): | |
args = get_args() | |
filters = [strip_whitespaces, | |
remove_itemize, | |
remove_comments, | |
remove_fig, | |
remove_table, | |
join_text_block, | |
replace_ref_withXXX, | |
gen_replace_sysname("XXXX"), | |
gen_remove_decorator("parabf"), | |
gen_remove_decorator("zhen"), | |
gen_remove_decorator("textbf"), | |
gen_remove_decorator("emph"), | |
gen_remove_decorator("texttt"), | |
gen_remove_decorator("paraf"), | |
remove_citation, | |
gen_replace_fn(r"\$\\times\$", " times"), | |
gen_replace_fn(r"\\%", "%"), | |
gen_replace_fn(r"\$(.*?)\$", r"\1")] | |
with open(args.file) as ifile: | |
lines = ifile.readlines() | |
after_filters = lines | |
for f in filters: | |
after_filters = f(after_filters) | |
for x in after_filters: | |
print(x) | |
if __name__ == "__main__": | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment