Created
June 19, 2020 18:31
-
-
Save waylan/ba297e1c46dc5a16cac6347387bc1452 to your computer and use it in GitHub Desktop.
Markdown to Markdown using Mistune (experimental and incomplete)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mistune import create_markdown | |
from mistune.renderers import BaseRenderer | |
import re | |
ESCAPE_CHAR = re.compile(r'(?<!\\)([\\`*_()\[\]#+-])') | |
UL_BULLET = re.compile(r'(?<=^)(\*)( +)', re.MULTILINE) | |
def indent(text, level, tab_length=4): | |
''' Indent block of text by level ''' | |
return '\n'.join(f'{" "*tab_length*level}{line}' for line in text.split('\n')) | |
class MdRenderer(BaseRenderer): | |
NAME = 'md' | |
IS_TREE = False | |
def text(self, text): | |
# TODO: escaping is probably more agressive than it needs to be. | |
return ESCAPE_CHAR.sub(r'\\\1', text) | |
def link(self, link, text=None, title=None): | |
if link == text or ('@' in text and link.startswith('mailto:') and link[7:] == text): | |
# Autolink | |
return f'<{text}>' | |
text = link if text is None else text | |
title = f' "{title}"' if title is not None else '' | |
return f'[{text}]({link}{title})' | |
def image(self, src, alt="", title=None): | |
title = f' "{title}"' if title is not None else '' | |
return f'' | |
def emphasis(self, text): | |
return f'*{text}*' | |
def strong(self, text): | |
return f'**{text}**' | |
def codespan(self, text): | |
# TODO: account for double backticks in code span. | |
if '`' in text: | |
return f'`` {text} ``' | |
return f'`{text}`' | |
def linebreak(self): | |
return ' \n' | |
def inline_html(self, html): | |
return html | |
def paragraph(self, text): | |
return f'{text}\n\n' | |
def heading(self, text, level): | |
return f'{"#"*level} {text}\n\n' | |
def newline(self): | |
return '\n' | |
def thematic_break(self): | |
return '- - -\n\n' | |
def block_text(self, text): | |
return text | |
def block_code(self, code, info=None): | |
info = info or '' | |
code = code.rstrip('\n') | |
return f'```{info}\n{code}\n```\n\n' | |
def block_quote(self, text): | |
return '\n'.join([f'> {line}' for line in text.strip().splitlines()]) + '\n' | |
def block_html(self, html): | |
return f'{html}\n\n' | |
def block_error(self, html): | |
# TODO: this is non-standard. Maybe ignore? | |
return f'<div class="error">{html}</div>\n\n' | |
def list(self, text, ordered, level, start=None): | |
if ordered: | |
# convert `*` to `1.` in each list item using `start`. | |
# TODO: make this increment. But how? | |
start = f'{start}. ' if start is not None else '1. ' | |
text = UL_BULLET.sub(f'{start:<4}', text) | |
if level == 1: | |
# Adjust blank lines for level 1 lists | |
# TODO: fix some edge cases with nested lists | |
text = text.lstrip('\n') | |
text += '\n\n' | |
return text | |
def list_item(self, text, level): | |
if '\n' in text: | |
# Indent all lines after the first line. | |
firstline, therest = text.split('\n', 1) | |
text = '\n'.join([firstline, indent(therest, 1)]) | |
# The linebreak goes at the front for nested items | |
return f'\n* {text}' | |
md2md = create_markdown(escape=False, renderer=MdRenderer()) | |
if __name__ == '__main__': | |
import sys | |
if len(sys.argv) > 1: | |
if sys.argv[1] == '-': | |
src = sys.stdin.read() | |
else: | |
with open(sys.argv[1]) as f: | |
src = f.read() | |
print(md2md(src)) | |
else: | |
print(f'usage: {sys.argv[0]} FILE|-') |
Hi @waylan ! Here at SJTU-CSE/awesome-cs#31 , I made use of your script to automatically validate if items in Markdown list are sorted in dict order, and pretty-printed the validator result out. Thank you for this awesome work!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
lepture/mistune#208 may be relevant to this.
Also, note that this uses f-strings (see PEP 498), which are only supported in Python 3.6+.