Skip to content

Instantly share code, notes, and snippets.

@zarzen
Created May 7, 2021 16:36
Show Gist options
  • Save zarzen/4c30ed748a662da97104d6c394213ef3 to your computer and use it in GitHub Desktop.
Save zarzen/4c30ed748a662da97104d6c394213ef3 to your computer and use it in GitHub Desktop.
strip latex code for grammarly check
import re
import argparse
def get_args():
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--file')
args = arg_parser.parse_args()
return args
def remove_comments(lines):
processed = []
for l in lines:
if l.startswith('%'):
pass
else:
processed.append(l)
return processed
def remove_block(lines, begin_pattern, end_pattern):
processed = []
i = 0
while i < len(lines):
if lines[i].startswith(begin_pattern):
# continue pass until \end{figure} or \end{figure*}
while i < len(lines) and not lines[i].startswith(end_pattern):
i+=1
else:
processed.append(lines[i])
i+=1
return processed
def remove_fig(lines):
lines = remove_block(lines, '\\begin{figure}', '\\end{figure}')
lines = remove_block(lines, "\\begin{figure*}", '\\end{figure*}')
return lines
def remove_table(lines):
lines = remove_block(lines, '\\begin{table}', '\\end{table}')
lines = remove_block(lines, '\\begin{table*}', '\\end{table*}')
return lines
def remove_itemize(lines):
processed = []
for l in lines:
if l.strip().startswith('\\begin{itemize}') or \
l.strip().startswith('\\begin{itemize*}') or \
l.strip().startswith('\\end{itemize}') or \
l.strip().startswith('\\end{itemize*}'):
processed.append('')
else:
processed.append(l)
return processed
def strip_whitespaces(lines):
processed = []
for l in lines:
processed.append(l.strip())
return processed
def replace_ref_withXXX(lines):
processed = []
for l in lines:
processed.append(re.sub(r"\\ref{fig:.*?}", "XX", l))
return processed
def join_text_block(lines):
groups = []
start_group = False
g = []
for l in lines:
if l.strip() != '' and start_group == False:
start_group = True
if start_group:
g.append(l)
if start_group and l.strip() == '':
# end this group
groups.append(g)
g = []
start_group = False
processed = []
for g in groups:
processed.append(" ".join(g))
processed.append('')
return processed
def gen_replace_sysname(sysname):
def _fn(lines):
processed = []
for l in lines:
processed.append(re.sub(r"\\sysname", sysname, l))
return processed
return _fn
def gen_remove_decorator(pattern):
pattern_str = "\\\\" + pattern + "{(.*?)}"
def _fn(lines):
processed = []
for l in lines:
processed.append(re.sub(pattern_str, r"\1", l))
return processed
return _fn
def remove_citation(lines):
processed = []
for l in lines:
n = re.sub(r"~\\cite{.*?}", "", l)
n = re.sub(r'\\cite{.*?}', "", n)
processed.append(n)
return processed
def gen_replace_fn(match_str, replace_str):
def _fn(lines):
p = []
for l in lines:
n = re.sub(match_str, replace_str, l)
p.append(n)
return p
return _fn
def main():
args = get_args()
filters = [strip_whitespaces,
remove_itemize,
remove_comments,
remove_fig,
remove_table,
join_text_block,
replace_ref_withXXX,
gen_replace_sysname("XXXX"),
gen_remove_decorator("parabf"),
gen_remove_decorator("zhen"),
gen_remove_decorator("textbf"),
gen_remove_decorator("emph"),
gen_remove_decorator("texttt"),
gen_remove_decorator("paraf"),
remove_citation,
gen_replace_fn(r"\$\\times\$", " times"),
gen_replace_fn(r"\\%", "%"),
gen_replace_fn(r"\$(.*?)\$", r"\1")]
with open(args.file) as ifile:
lines = ifile.readlines()
after_filters = lines
for f in filters:
after_filters = f(after_filters)
for x in after_filters:
print(x)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment