Skip to content

Instantly share code, notes, and snippets.

@mi-skam
Created September 11, 2025 11:55
Show Gist options
  • Save mi-skam/4c340dcc35ffb9210b5fe69f9a3c930a to your computer and use it in GitHub Desktop.
Save mi-skam/4c340dcc35ffb9210b5fe69f9a3c930a to your computer and use it in GitHub Desktop.
import re
# A closure to generate regex substitution functions
def conv_generator(html_pattern, md_replacement):
"""Generate a function that substitutes html_pattern with md_replacement"""
html_pattern = re.compile(html_pattern)
def sub_func(text):
return html_pattern.sub(md_replacement, text)
return sub_func
emphasis = conv_generator(r'<em>(.*?)</em>', r'*\1*')
paragraph = conv_generator(r'<p>(.*?)</p>', r'\1\n\n')
href = conv_generator(r'<a href="(.*?)">(.*?)</a>', r'[\2](\1)')
newline = conv_generator(r'\s+', ' ')
def html2markdown(html):
'''Take in html text as input and return markdown'''
result = newline(html)
result = emphasis(result)
result = paragraph(result)
result = href(result)
# remove any trailing newlines added by paragraphs
result = result.strip()
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment