Last active
February 15, 2020 12:45
-
-
Save JoaoFelipe/a24279a3276cd6220a32a2fc66095f47 to your computer and use it in GitHub Desktop.
Converts latex into plain text for grammarly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import argparse | |
from time import sleep | |
from string import ascii_letters | |
import keyboard # using module keyboard | |
import pyperclip | |
import bibtexparser | |
from bibtexparser.bparser import BibTexParser | |
from bibtexparser.customization import convert_to_unicode | |
WARNING = True | |
IGNORE_WARNING = [ | |
"documentclass", "usepackage", "addbibresource", | |
"jvol", "jnum", "paper", "jmonth", "jname", "pubyear", | |
"newtheorem", "setcounter", "newcommand", | |
"DTLsetseparator", "DTLloaddb", "ttfamily", "lstset", | |
"makeatletter", "def", "lst", "global", "advance", "ifnum", | |
"c", "z", "fi", "numexpr", "newcounter", "stepcounter", "arabic", | |
"sptitle", "editor", "markboth", "protect", "let", "newline", | |
"arraybackslash", "lsthk", "expandafter", "makeatother", | |
"maketitle", "begin", "end", "label", "caption", | |
"centering", "includegraphics", "subfloat", "vspace", | |
"newcolumntype", "toprule", "hspace", "midrule", "bottomrule", | |
"lstinputlisting", "scriptsize", "printbibliography", "ldots", | |
] | |
NOT_FOUND = 0 | |
BIBLIOGRAPHY = None | |
def read(filename, var=None): | |
data = {} | |
with open(filename, "r") as fil: | |
for line in fil: | |
line = line.strip() | |
if line: | |
k, v = line.split(" = ") | |
data[k] = v | |
if var is None: | |
return data | |
return data[var] | |
class LatexParser(object): | |
def __init__(self): | |
self._result = [] | |
self._command = None | |
self._param = None | |
self._arg_par = 0 | |
self._darg_par = 0 | |
self._citations = {} | |
self._cur_cite = 0 | |
self._refs = {} | |
self._cur_ref = 0 | |
def __call__(self, text): | |
text = re.sub(r"(?<!\\)%.*\n", "", text) | |
text = text.replace("\n\n", "<<<keepn>>>").replace("~", " ").replace("\n", " ") | |
text = re.sub(r"\s+", " ", text) | |
text = text.replace("``", "“").replace("''", "”") | |
text = text.replace("`", "‘").replace("'", "’") | |
text = text.split("<<<keepn>>>") | |
text = "\n\n".join(x.strip() for x in text) | |
self._result = [] | |
self._command = None | |
self._param = None | |
for letter in text: | |
if self._command is None: | |
self._visit_text(letter) | |
elif self._param[-1][0] is None: | |
self._visit_command(letter) | |
elif self._param[-1][0] == "[": | |
self._visit_darg(letter) | |
elif self._param[-1][0] == "{": | |
self._visit_arg(letter) | |
if self._command is not None: | |
self._visit_command("") | |
return "".join(self._result) | |
def _visit_text(self, letter): | |
#print("T", letter) | |
if letter == "\\": | |
self._command = [] | |
self._param = [[None]] | |
else: | |
self._result.append(letter) | |
def _visit_command(self, letter): | |
#print("C", letter) | |
if letter == "\\" and len(self._command) == 0: | |
self._command = None | |
self._param = None | |
self._result.append(letter) | |
elif letter in ["%", ",", "_"] and len(self._command) == 0: | |
self._command = None | |
self._param = None | |
self._visit_text(letter) | |
elif letter == "[": | |
self._darg_par = 1 | |
self._param.append(["["]) | |
elif letter == "{": | |
self._arg_par = 1 | |
self._param.append(["{"]) | |
elif len(self._param) > 1 or letter not in ascii_letters: | |
command = "".join(self._command) | |
#print(">>", command) | |
args = [] | |
dargs = [] | |
for par in self._param: | |
parser = LatexParser() | |
if par[0] is None: | |
pass | |
elif par[0] == "{": | |
args.append(parser("".join(par[1:-1]))) | |
else: | |
dargs.append(parser("".join(par[1:-1]))) | |
if hasattr(self, command): | |
self._result.append(getattr(self, command)(args, dargs)) | |
elif WARNING and command not in IGNORE_WARNING: | |
global NOT_FOUND | |
NOT_FOUND += 1 | |
print("Warning {}: method for \\{}[{}]{{{}}} not found".format(NOT_FOUND, command, len(dargs), len(args))) | |
self._command = None | |
self._param = None | |
self._visit_text(letter) | |
elif letter in ascii_letters: | |
self._command.append(letter) | |
def _visit_darg(self, letter): | |
self._param[-1].append(letter) | |
if letter == "]": | |
self._darg_par -= 1 | |
if self._darg_par <= 0: | |
self._param.append([None]) | |
elif letter == "[": | |
self._darg_par += 1 | |
def _visit_arg(self, letter): | |
#print("A", letter) | |
self._param[-1].append(letter) | |
if letter == "}": | |
self._arg_par -= 1 | |
if self._arg_par <= 0: | |
self._param.append([None]) | |
elif letter == "{": | |
self._arg_par += 1 | |
def _return_last(self, args, dargs): | |
return args[-1] | |
def _return_first(self, args, dargs): | |
return args[0] | |
chapterinitial = _return_last | |
section = _return_last | |
subsection = _return_last | |
subsubsection = _return_last | |
caption = _return_last | |
scriptvarext = _return_first | |
questvarext = _return_first | |
emph = _return_last | |
textit = _return_last | |
textbf = _return_last | |
textc = _return_last | |
multicolumn = _return_last | |
title = _return_last | |
author = _return_last | |
affil = _return_last | |
def cite(self, args, dargs): | |
result = [] | |
for cite in args[0].split(","): | |
cite = cite.strip() | |
if cite not in self._citations: | |
self._cur_cite += 1 | |
found = False | |
if BIBLIOGRAPHY is not None: | |
parser = BibTexParser() | |
parser.customization = convert_to_unicode | |
entries = bibtexparser.load(open(BIBLIOGRAPHY), parser=parser).entries | |
for entry in entries: | |
if entry.get('ID').strip().lower() == cite.lower(): | |
self._citations[cite] = "{}:({}, {})".format(self._cur_cite, entry.get('author', ''), entry.get('title', '')) | |
found = True | |
if not found: | |
self._citations[cite] = "{}:({})".format(self._cur_cite, cite) | |
else: | |
self._citations[cite] = str(self._cur_cite) | |
result.append(self._citations[cite]) | |
return "[{}]".format(", ".join(result)) | |
def textcite(self, args, dargs): | |
return args[0][:5].capitalize() + " et al. " + self.cite(args, []) | |
def item(self, args, dargs): | |
return "\n-" | |
def ref(self, args, dargs): | |
ref = args[0].strip() | |
if ref not in self._refs: | |
self._cur_ref += 1 | |
self._refs[ref] = str(self._cur_ref) | |
return self._refs[ref] | |
def scriptvar(self, args, dargs): | |
return read("script_var.txt", args[0]) | |
def questvar(self, args, dargs): | |
return read("quest_var.txt", args[0]) | |
def nth(self, args, dargs): | |
if len(args[0]) > 1 and args[0][-2] == "1": | |
return args[0] + "th" | |
if args[0].endswith("1"): | |
return args[0] + "st" | |
if args[0].endswith("2"): | |
return args[0] + "nd" | |
if args[0].endswith("3"): | |
return args[0] + "rd" | |
return args[0] + "th" | |
def eg(self, args, dargs): | |
return "e.g.," | |
def ie(self, args, dargs): | |
return "i.e.," | |
def footnote(self, args, dargs): | |
return "({})".format(args[0]) | |
def scriptanalysisdef(self, args, dargs): | |
return "{}\n\n{}".format(args[0], args[1]) | |
class LatexClipboard(object): | |
def __init__(self, parser=LatexParser): | |
self.ctrl = "" | |
self.parser_cls = parser | |
def copy(self): | |
keyboard.send("ctrl+c") | |
sleep(0.5) | |
def ctrl_copy(self): | |
current = pyperclip.paste() | |
keyboard.send("ctrl+c") | |
sleep(0.5) | |
self.ctrl = pyperclip.paste() | |
pyperclip.copy(current) | |
def ctrl_paste(self): | |
self.latex_paste(self.ctrl) | |
def custom_paste(self, text): | |
current = pyperclip.paste() | |
pyperclip.copy(text) | |
keyboard.send("ctrl+v") | |
sleep(0.5) | |
pyperclip.copy(current) | |
def latex_paste(self, text): | |
parser = self.parser_cls() | |
text = parser(text) | |
self.custom_paste(text) | |
def paste(self): | |
self.latex_paste(pyperclip.paste()) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-c", "--copy", default="f8") | |
parser.add_argument("-v", "--paste", default="f9") | |
parser.add_argument("-d", "--ctrl-copy", default="ctrl+f8") | |
parser.add_argument("-f", "--ctrl-paste", default="ctrl+f9") | |
parser.add_argument("-b", "--bibliography", default=None) | |
args = parser.parse_args() | |
obj = LatexClipboard() | |
BIBLIOGRAPHY = args.bibliography | |
keyboard.add_hotkey(args.copy, obj.copy) | |
keyboard.add_hotkey(args.ctrl_copy, obj.ctrl_copy) | |
keyboard.add_hotkey(args.paste, obj.paste) | |
keyboard.add_hotkey(args.ctrl_paste, obj.ctrl_paste) | |
keyboard.wait() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment