Last active
November 16, 2024 13:13
-
-
Save ViniciusFM/2523fe7fd9dea4125ee784b2205f2e38 to your computer and use it in GitHub Desktop.
This code parses a RTF content from your clipboard to a simple LaTeX syntax
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
pyrtf2latex.py | |
============== | |
This script was created to copy rich text from an office suite | |
application and adapt it for transferring text formatting, including | |
bold, italic, subscript, and superscript content. Additionally, it | |
highlights any text enclosed in parentheses (but maybe you won't need | |
this, feel free to modify). You are welcome to adapt this script for | |
your own needs, but please note that it is designed to work | |
exclusively on Linux distributions, as it relies on the xclip tool. In | |
this implementation, I opted to convert RTF (Rich Text Format) to HTML | |
first to facilitate parsing. Parsing HTML proved to be more | |
straightforward for me, as I didn't have sufficient time to delve | |
deeply into the complexities of RTF syntax. Feel free to use, modify | |
and redistribute the script as you see fit! | |
''' | |
import re | |
import pyperclip | |
import subprocess | |
from pynput import keyboard | |
CMD1=['xclip', '-selection', 'clipboard'] | |
CMD2=['xclip', '-selection', 'clipboard', '-o', '-t', 'text/html'] | |
def get_clipboard_as_html() -> str: | |
try: | |
print('copying from clipboard') | |
subprocess.run(CMD1, stdout=subprocess.DEVNULL) | |
result = subprocess.run(CMD2, | |
capture_output=True, | |
text=True, | |
check=True, | |
encoding='utf-8' | |
) | |
return result.stdout | |
except subprocess.CalledProcessError as e: | |
print(f'Err: something went wrong during copy: {str(e)}') | |
return None | |
def html_parser(content:str) -> str: | |
print('parsing') | |
# bold | |
content = re.sub(r'<\s*(b|strong)\s*>(.*?)<\s*/\s*(b|strong)\s*>', r'\\textbf{\2}', content, flags=re.DOTALL | re.IGNORECASE) | |
# italic | |
content = re.sub(r'<\s*(i|em)\s*>(.*?)<\s*/\s*(i|em)\s*>', r'\\textit{\2}', content, flags=re.DOTALL | re.IGNORECASE) | |
# subscript | |
content = re.sub(r'<\s*sub\s*>(.*?)<\s*/\s*sub\s*>', r'\\textsubscript{\1}', content, flags=re.DOTALL | re.IGNORECASE) | |
# superscript | |
content = re.sub(r'<\s*sup\s*>(.*?)<\s*/\s*sup\s*>', r'\\textsubscript{\1}', content, flags=re.DOTALL | re.IGNORECASE) | |
# paragraphs | |
content = re.sub(r'<\s*p\s*>', r'\n', content, flags=re.IGNORECASE) # opening paragraph | |
content = re.sub(r'<\s*/\s*p\s*>', r'\n', content, flags=re.IGNORECASE) # closing paragraph | |
# i'm highlighting parentesis cases, just for my personal use, you can comment the line below to avoid this | |
content = re.sub(r'\((.*?)\)', r'(\\hl{\1})', content) | |
# ignoring the rest | |
content = re.sub(r'<[^>]+>', '', content) | |
return content | |
def on_press(key): | |
# change here the hotkey for parsing | |
if(hasattr(key, 'f9') and key == key.f9): | |
cboard = get_clipboard_as_html() | |
if cboard: | |
pyperclip.copy(test:=html_parser(cboard.replace('\n', ' '))) | |
print('ready to paste') | |
else: | |
print('Something went wrong') | |
pyperclip.copy('<<Error on copying check log>>') | |
def main(): | |
global listener | |
listener = keyboard.Listener( | |
on_press=on_press | |
) | |
listener.start() | |
listener.join() | |
if __name__ == '__main__': | |
try: | |
print('You need to perform 3 steps:\ | |
\n 1) Copy content.\ | |
\n 2) Press F9 to parse from RTF to LaTeX.\ | |
\n 3) Paste it.\ | |
\n<This will keep running until you press Ctrl+C, to stop>\n') | |
main() | |
except KeyboardInterrupt: | |
listener.stop() | |
print('\nLeaving...') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Requirements:
Install xclip tool
Create a file named
requirements.txt
with the content below:pip install -r requirements.txt
, preferable from a virtual environment to avoid messing up your global settings