Created
September 11, 2025 11:55
-
-
Save mi-skam/4c340dcc35ffb9210b5fe69f9a3c930a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# A closure to generate regex substitution functions | |
def conv_generator(html_pattern, md_replacement): | |
"""Generate a function that substitutes html_pattern with md_replacement""" | |
html_pattern = re.compile(html_pattern) | |
def sub_func(text): | |
return html_pattern.sub(md_replacement, text) | |
return sub_func | |
emphasis = conv_generator(r'<em>(.*?)</em>', r'*\1*') | |
paragraph = conv_generator(r'<p>(.*?)</p>', r'\1\n\n') | |
href = conv_generator(r'<a href="(.*?)">(.*?)</a>', r'[\2](\1)') | |
newline = conv_generator(r'\s+', ' ') | |
def html2markdown(html): | |
'''Take in html text as input and return markdown''' | |
result = newline(html) | |
result = emphasis(result) | |
result = paragraph(result) | |
result = href(result) | |
# remove any trailing newlines added by paragraphs | |
result = result.strip() | |
return result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment