ViniciusFM · November 16, 2024 13:13 · ViniciusFM · Nov 15, 2024
diff --git a/pyrtf2latex.py b/pyrtf2latex.py
 '''
 pyrtf2latex.py
 ==============

    This  script  was created to copy   rich  text  from  an  office suite
    application and adapt it  for transferring text  formatting, including
    bold,  italic,  subscript,   and superscript content. Additionally, it
    highlights any text enclosed in  parentheses (but maybe you won't need
    this, feel free to modify). You are welcome  to adapt this  script for
    your   own   needs,  but  please  note  that  it  is  designed to work
    exclusively on Linux distributions, as it relies on the xclip tool. In
    this implementation, I opted to convert RTF (Rich Text Format) to HTML
    first   to  facilitate  parsing.   Parsing  HTML  proved to    be more
    straightforward for me, as I  didn't   have  sufficient time  to delve
    deeply  into the complexities of RTF syntax. Feel free  to use, modify
    and redistribute the script as you see fit!
 '''
 import re
 import pyperclip
 import subprocess
 from pynput import keyboard

 CMD1=['xclip', '-selection', 'clipboard']
 CMD2=['xclip', '-selection', 'clipboard', '-o', '-t', 'text/html']

 def get_clipboard_as_html() -> str:
    try:
        print('copying from clipboard')
        subprocess.run(CMD1, stdout=subprocess.DEVNULL)
        result = subprocess.run(CMD2,
            capture_output=True,
            text=True,
            check=True,
            encoding='utf-8'
        )
        return result.stdout
    except subprocess.CalledProcessError as e:
        print(f'Err: something went wrong during copy: {str(e)}')
        return None

 def html_parser(content:str) -> str:
    print('parsing')
    # bold
    content = re.sub(r'<\s*(b|strong)\s*>(.*?)<\s*/\s*(b|strong)\s*>', r'\\textbf{\2}', content, flags=re.DOTALL | re.IGNORECASE)
    # italic
    content = re.sub(r'<\s*(i|em)\s*>(.*?)<\s*/\s*(i|em)\s*>', r'\\textit{\2}', content, flags=re.DOTALL | re.IGNORECASE)
    # subscript
    content = re.sub(r'<\s*sub\s*>(.*?)<\s*/\s*sub\s*>', r'\\textsubscript{\1}', content, flags=re.DOTALL | re.IGNORECASE)
    # superscript
    content = re.sub(r'<\s*sup\s*>(.*?)<\s*/\s*sup\s*>', r'\\textsubscript{\1}', content, flags=re.DOTALL | re.IGNORECASE)
    # paragraphs
    content = re.sub(r'<\s*p\s*>', r'\n', content, flags=re.IGNORECASE) # opening paragraph
    content = re.sub(r'<\s*/\s*p\s*>', r'\n', content, flags=re.IGNORECASE) # closing paragraph
    # i'm highlighting parentesis cases, just for my personal use, you can comment the line below to avoid this
    content = re.sub(r'\((.*?)\)', r'(\\hl{\1})', content)   
    # ignoring the rest
    content = re.sub(r'<[^>]+>', '', content)
    return content

 def on_press(key):
    # change here the hotkey for parsing
    if(hasattr(key, 'f9') and key == key.f9):
        cboard = get_clipboard_as_html()
        if cboard:
            pyperclip.copy(test:=html_parser(cboard.replace('\n', ' ')))
            print('ready to paste')
        else:
            print('Something went wrong')
            pyperclip.copy('<<Error on copying check log>>')

 def main():
    global listener
    listener = keyboard.Listener(
        on_press=on_press
    )
    listener.start()
    listener.join()

 if __name__ == '__main__':
    try:
        print('You need to perform 3 steps:\
               \n  1) Copy content.\
               \n  2) Press F9 to parse from RTF to LaTeX.\
               \n  3) Paste it.\
               \n<This will keep running until you press Ctrl+C, to stop>\n')
        main()
    except KeyboardInterrupt:
        listener.stop()
        print('\nLeaving...')
	'''
	pyrtf2latex.py
	==============

	This script was created to copy rich text from an office suite
	application and adapt it for transferring text formatting, including
	bold, italic, subscript, and superscript content. Additionally, it
	highlights any text enclosed in parentheses (but maybe you won't need
	this, feel free to modify). You are welcome to adapt this script for
	your own needs, but please note that it is designed to work
	exclusively on Linux distributions, as it relies on the xclip tool. In
	this implementation, I opted to convert RTF (Rich Text Format) to HTML
	first to facilitate parsing. Parsing HTML proved to be more
	straightforward for me, as I didn't have sufficient time to delve
	deeply into the complexities of RTF syntax. Feel free to use, modify
	and redistribute the script as you see fit!
	'''
	import re
	import pyperclip
	import subprocess
	from pynput import keyboard

	CMD1=['xclip', '-selection', 'clipboard']
	CMD2=['xclip', '-selection', 'clipboard', '-o', '-t', 'text/html']

	def get_clipboard_as_html() -> str:
	try:
	print('copying from clipboard')
	subprocess.run(CMD1, stdout=subprocess.DEVNULL)
	result = subprocess.run(CMD2,
	capture_output=True,
	text=True,
	check=True,
	encoding='utf-8'
	)
	return result.stdout
	except subprocess.CalledProcessError as e:
	print(f'Err: something went wrong during copy: {str(e)}')
	return None

	def html_parser(content:str) -> str:
	print('parsing')
	# bold
	content = re.sub(r'<\s(b\|strong)\s>(.?)<\s/\s(b\|strong)\s>', r'\\textbf{\2}', content, flags=re.DOTALL \| re.IGNORECASE)
	# italic
	content = re.sub(r'<\s(i\|em)\s>(.?)<\s/\s(i\|em)\s>', r'\\textit{\2}', content, flags=re.DOTALL \| re.IGNORECASE)
	# subscript
	content = re.sub(r'<\ssub\s>(.?)<\s/\ssub\s>', r'\\textsubscript{\1}', content, flags=re.DOTALL \| re.IGNORECASE)
	# superscript
	content = re.sub(r'<\ssup\s>(.?)<\s/\ssup\s>', r'\\textsubscript{\1}', content, flags=re.DOTALL \| re.IGNORECASE)
	# paragraphs
	content = re.sub(r'<\sp\s>', r'\n', content, flags=re.IGNORECASE) # opening paragraph
	content = re.sub(r'<\s/\sp\s*>', r'\n', content, flags=re.IGNORECASE) # closing paragraph
	# i'm highlighting parentesis cases, just for my personal use, you can comment the line below to avoid this
	content = re.sub(r'\((.*?)\)', r'(\\hl{\1})', content)
	# ignoring the rest
	content = re.sub(r'<[^>]+>', '', content)
	return content

	def on_press(key):
	# change here the hotkey for parsing
	if(hasattr(key, 'f9') and key == key.f9):
	cboard = get_clipboard_as_html()
	if cboard:
	pyperclip.copy(test:=html_parser(cboard.replace('\n', ' ')))
	print('ready to paste')
	else:
	print('Something went wrong')
	pyperclip.copy('<<Error on copying check log>>')

	def main():
	global listener
	listener = keyboard.Listener(
	on_press=on_press
	)
	listener.start()
	listener.join()

	if __name__ == '__main__':
	try:
	print('You need to perform 3 steps:\
	\n 1) Copy content.\
	\n 2) Press F9 to parse from RTF to LaTeX.\
	\n 3) Paste it.\
	\n<This will keep running until you press Ctrl+C, to stop>\n')
	main()
	except KeyboardInterrupt:
	listener.stop()
	print('\nLeaving...')