Skip to content

Instantly share code, notes, and snippets.

@wqweto
Last active January 17, 2018 15:05
Show Gist options
  • Save wqweto/43796a5d06ad9d7953bcc1825e39a806 to your computer and use it in GitHub Desktop.
Save wqweto/43796a5d06ad9d7953bcc1825e39a806 to your computer and use it in GitHub Desktop.
Battle of the slowest - markdown parser 2.0
--
-- lm_main.lua - a simple markdown to html converter with some twists
--
-- original battle at http://forums.bgdev.org/index.php?showtopic=49993
--
local lpeg = require("lpeg")
local re = require("lpeg.re")
local esc_pattern = lpeg.Cs((lpeg.P"<"/"&lt;" + lpeg.P">"/"&gt;" + lpeg.P"&"/"&amp;" + 1)^0)
local function html_escape(text)
if string.find(text, '<') or string.find(text, '>') or string.find(text, '&') then
return esc_pattern:match(text)
end
return text
end
local toc = {}
local links = {}
local block_pattern = [=[
document <- {| block+ |} <empty_line>* !.
block <- <empty_line>* (<heading> / <link_def> / <paragraph>)
heading <- ({ '#' '#'^-5 } !'#' %ws { (!%nl .)* } %nl?) -> heading
link_def <- ('[' { (!']' .)* } ']' ':'? %ws { (!%nl .)* } %nl?) -> link_def
paragraph <- { (%nl? !<empty_line> !<link_def> !<heading> (!%nl .)+)+ }
empty_line <- ((!%nl %s)* %nl) / (%ws+ !.)
]=]
local block_defs = {
ws = lpeg.S" \t",
heading = function(level, text)
toc[#toc + 1] = { tag = "head", level = #level, id = #toc + 1, text }
return toc[#toc]
end,
link_def = function(id, href) links[id] = href end,
}
local inline_pattern = [=[
toplevel <- {~ (%s* <text> / <text>? %s+ / .)* ~}
text <- <bold> / <italic> / <link> / ((!([*_] ws) %S)+ -> html_escape)
bold <- ('*' !%s {~ (%s* <text>)+ ~} '*' &ws) -> bold
italic <- ('_' !%s {~ (%s* <text>)+ ~} '_' &ws) -> italic
link <- ('[' { (!']' .)+ => is_link } ']') -> link
ws <- %s / !.
]=]
local inline_defs = {
bold = function(text) return "<b>" .. text .. "</b>" end,
italic = function(text) return "<i>" .. text .. "</i>" end,
html_escape = html_escape,
is_link = function(src, pos, text) return links[text] ~= nil end,
link = function(text) return string.format("<a href='%s'>%s</a>", links[text], html_escape(text)) end
}
local function subst(openp, repl, endp)
openp = lpeg.P(openp)
endp = endp and lpeg.P(endp) or openp
local contents = lpeg.C((1 - endp)^1)
local patt = openp * contents * endp
if repl then patt = patt/repl end
return patt
end
local link_pattern = subst("[", nil, "]") / function(text) return links[text] and string.format("<a href='%s'>%s</a>", links[text], text) or "["..text.."]" end
local fast_inline_pattern = lpeg.Cs((subst("*", "<b>%1</b>") + subst("_", "<i>%1</i>") + link_pattern + 1)^1)
local function main(arg)
local start_clock = os.clock()
local usage = string.format([[
usage: %s [input.md] [output.html]
where input and output defaults to console
]], arg[0])
local block_grammar = re.compile(block_pattern, block_defs)
local inline_grammar = re.compile(inline_pattern, inline_defs)
local inline_find = lpeg.S("*_<>&[")
inline_find = (1 - inline_find)^0 * inline_find
-- read input
local inp = #arg < 1 and io.stdin or io.open(arg[1], "rt")
if inp == nil then
io.stderr:write(usage .. string.format("ERROR: %s not found\n", arg[1]))
os.exit(1)
end
local markdown = inp:read("*all")
inp:close()
io.stderr:write(string.format("%.3f: after read input\n", os.clock() - start_clock))
-- parse markdown in blocks (headings & paragraphs) while collecting link defs
local blocks = block_grammar:match(markdown)
io.stderr:write(string.format("%.3f: after parse blocks\n", os.clock() - start_clock))
-- dump html prolog
local out = #arg < 2 and io.stdout or io.open(arg[2], "wt")
if out == nil then
io.stderr:write(usage .. string.format("ERROR: cannot create %s\n", arg[2]))
os.exit(2)
end
out:write("<html><head></head><body>\n")
-- dump table-of-contents as nested ol's
local level = 0
for _, v in ipairs(toc) do
while level ~= v.level do
local tag = level < v.level and "<ol>" or "</ol>"
out:write(string.rep(" ", level - (level < v.level and 0 or 1)) .. tag .. "\n")
level = level + (level < v.level and 1 or -1)
end
out:write(string.format("%s<li><a id='_%d' href='#%d'>%s</a></li>\n",
string.rep(" ", v.level), v.id, v.id, html_escape(v[1])))
end
while level > 0 do
out:write(string.rep(" ", level - 1) .. "</ol>\n")
level = level - 1
end
io.stderr:write(string.format("%.3f: after dump TOC\n", os.clock() - start_clock))
-- dump blocks and replace inline elements (bold, italic, links)
for _, v in ipairs(blocks) do
if type(v) == "table" then
if v.tag == "head" then
out:write(string.format("<h%d id='%d'>%s <a href='#_%d'>[^]</a></h%d>\n",
v.level, v.id, html_escape(v[1]), v.id, v.level))
end
else
if inline_find:match(v) then
--out:write("<p>"..inline_grammar:match(v).."</p>\n")
out:write("<p>"..fast_inline_pattern:match(html_escape(v)).."</p>\n")
else
out:write("<p>"..html_escape(v).."</p>\n")
end
end
end
io.stderr:write(string.format("%.3f: after dump blocks\n", os.clock() - start_clock))
-- dump html epilog
out:write("</body></html>")
--out:close()
io.stderr:write(string.format("%.3f: done\n", os.clock() - start_clock))
end
main(arg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment