Skip to content

Instantly share code, notes, and snippets.

@monsieurpablo
Created February 14, 2025 22:56
Show Gist options
  • Save monsieurpablo/b21be55000d5fe8f32ce2b4c6cbe8996 to your computer and use it in GitHub Desktop.
Save monsieurpablo/b21be55000d5fe8f32ce2b4c6cbe8996 to your computer and use it in GitHub Desktop.
markdown2unicode
import re
# Bold: Mathematical Sans-Serif Bold
def char_to_bold(c):
if 'a' <= c <= 'z':
return chr(0x1D5EE + (ord(c) - ord('a')))
elif 'A' <= c <= 'Z':
return chr(0x1D5D4 + (ord(c) - ord('A')))
elif '0' <= c <= '9':
return chr(0x1D7CE + (ord(c) - ord('0')))
return c
# Italic: Mathematical Italic
def char_to_italic(c):
if 'a' <= c <= 'z':
return chr(0x1D44E + (ord(c) - ord('a')))
elif 'A' <= c <= 'Z':
return chr(0x1D434 + (ord(c) - ord('A')))
return c
# Bold-Italic: Mathematical Sans-Serif Bold Italic
def char_to_bold_italic(c):
if 'a' <= c <= 'z':
return chr(0x1D656 + (ord(c) - ord('a')))
elif 'A' <= c <= 'Z':
return chr(0x1D63C + (ord(c) - ord('A')))
return c
# Bold-Italic: Mathematical Sans-Serif Bold Italic
def char_to_bold_italic(c):
if 'a' <= c <= 'z':
return chr(0x1D656 + (ord(c) - ord('a')))
elif 'A' <= c <= 'Z':
return chr(0x1D63C + (ord(c) - ord('A')))
return c
# Underline: Add combining underline (U+0332)
def underline_repl(match):
text = match.group(1)
return ''.join([c + '\u0332' for c in text])
def process_bold_section(content):
# Convert italic (_..._ or *...*) within bold to bold-italic
content = re.sub(
r'([_*])(?=\S)(.*?)(?<=\S)\1',
lambda m: ''.join(char_to_bold_italic(c) for c in m.group(2)),
content,
flags=re.DOTALL
)
# Convert remaining text to bold
return ''.join(char_to_bold(c) for c in content)
def convert_markdown_to_unicode(text):
# Process bold-italic (***...***)
text = re.sub(r'\*\*\*(?=\S)(.*?)(?<=\S)\*\*\*',
lambda m: ''.join(char_to_bold_italic(c) for c in m.group(1)),
text, flags=re.DOTALL)
# Process bold (**...**) with nested italic
text = re.sub(r'\*\*(?=\S)(.*?)(?<=\S)\*\*',
lambda m: process_bold_section(m.group(1)),
text, flags=re.DOTALL)
# Process remaining italic (_..._ or *...*)
text = re.sub(r'([_*])(?=\S)(.*?)(?<=\S)\1',
lambda m: ''.join(char_to_italic(c) for c in m.group(2)),
text, flags=re.DOTALL)
# Process underline (__...__)
text = re.sub(r'__(?=\S)(.*?)(?<=\S)__', underline_repl, text, flags=re.DOTALL)
return text
if __name__ == '__main__':
# Example usage
markdown_text = "**Is _This_ the Future of AEC?** *Regular italic* __Underline__"
converted_text = convert_markdown_to_unicode(markdown_text)
print(converted_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment