Created
February 14, 2025 22:56
-
-
Save monsieurpablo/b21be55000d5fe8f32ce2b4c6cbe8996 to your computer and use it in GitHub Desktop.
markdown2unicode
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# Bold: Mathematical Sans-Serif Bold | |
def char_to_bold(c): | |
if 'a' <= c <= 'z': | |
return chr(0x1D5EE + (ord(c) - ord('a'))) | |
elif 'A' <= c <= 'Z': | |
return chr(0x1D5D4 + (ord(c) - ord('A'))) | |
elif '0' <= c <= '9': | |
return chr(0x1D7CE + (ord(c) - ord('0'))) | |
return c | |
# Italic: Mathematical Italic | |
def char_to_italic(c): | |
if 'a' <= c <= 'z': | |
return chr(0x1D44E + (ord(c) - ord('a'))) | |
elif 'A' <= c <= 'Z': | |
return chr(0x1D434 + (ord(c) - ord('A'))) | |
return c | |
# Bold-Italic: Mathematical Sans-Serif Bold Italic | |
def char_to_bold_italic(c): | |
if 'a' <= c <= 'z': | |
return chr(0x1D656 + (ord(c) - ord('a'))) | |
elif 'A' <= c <= 'Z': | |
return chr(0x1D63C + (ord(c) - ord('A'))) | |
return c | |
# Bold-Italic: Mathematical Sans-Serif Bold Italic | |
def char_to_bold_italic(c): | |
if 'a' <= c <= 'z': | |
return chr(0x1D656 + (ord(c) - ord('a'))) | |
elif 'A' <= c <= 'Z': | |
return chr(0x1D63C + (ord(c) - ord('A'))) | |
return c | |
# Underline: Add combining underline (U+0332) | |
def underline_repl(match): | |
text = match.group(1) | |
return ''.join([c + '\u0332' for c in text]) | |
def process_bold_section(content): | |
# Convert italic (_..._ or *...*) within bold to bold-italic | |
content = re.sub( | |
r'([_*])(?=\S)(.*?)(?<=\S)\1', | |
lambda m: ''.join(char_to_bold_italic(c) for c in m.group(2)), | |
content, | |
flags=re.DOTALL | |
) | |
# Convert remaining text to bold | |
return ''.join(char_to_bold(c) for c in content) | |
def convert_markdown_to_unicode(text): | |
# Process bold-italic (***...***) | |
text = re.sub(r'\*\*\*(?=\S)(.*?)(?<=\S)\*\*\*', | |
lambda m: ''.join(char_to_bold_italic(c) for c in m.group(1)), | |
text, flags=re.DOTALL) | |
# Process bold (**...**) with nested italic | |
text = re.sub(r'\*\*(?=\S)(.*?)(?<=\S)\*\*', | |
lambda m: process_bold_section(m.group(1)), | |
text, flags=re.DOTALL) | |
# Process remaining italic (_..._ or *...*) | |
text = re.sub(r'([_*])(?=\S)(.*?)(?<=\S)\1', | |
lambda m: ''.join(char_to_italic(c) for c in m.group(2)), | |
text, flags=re.DOTALL) | |
# Process underline (__...__) | |
text = re.sub(r'__(?=\S)(.*?)(?<=\S)__', underline_repl, text, flags=re.DOTALL) | |
return text | |
if __name__ == '__main__': | |
# Example usage | |
markdown_text = "**Is _This_ the Future of AEC?** *Regular italic* __Underline__" | |
converted_text = convert_markdown_to_unicode(markdown_text) | |
print(converted_text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment