Last active
April 4, 2017 05:54
-
-
Save santoshrajan/6781970 to your computer and use it in GitHub Desktop.
Markdown Parser in 1k of JavaScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require("fs"), | |
src = fs.readFileSync(process.argv[2], 'utf8') | |
// createParser and createBodyParser creates parsers. A parser takes a string, | |
// and if successful returns an array of two elements. The object representation | |
// of consumed portion and the remainder of of the string. If failure returns null. | |
var markdownParser = createBodyParser("markdown", | |
createParser('newline', /^(\n+)/), | |
createParser('h1', /^# ([^\n]+)/), | |
createParser('h2', /^## ([^\n]+)/), | |
createParser('h3', /^### ([^\n]+)/), | |
createParser('h4', /^#### ([^\n]+)/), | |
createParser('h5', /^##### ([^\n]+)/), | |
createParser('h6', /^###### ([^\n]+)/), | |
createBodyParser('codeblock', | |
createParser('code', /^ {4}([^\n]+)\n/) | |
), | |
createBodyParser('listblock', | |
createParser('list', /^\* ([^\n]+)\n/) | |
), | |
createBodyParser('orderedListblock', | |
createParser('orderedList', /^[0-9]+ ([^\n]+)\n/) | |
), | |
createBodyParser("paragraph", | |
createParser('inlinetext', /^([^\n\[\!\*]+)/), | |
linkParser, | |
imageParser, | |
createParser('emphasis', /^\*(.+?)\*/), | |
createParser('strong', /^\*\*(.+?)\*\*/), | |
createParser('text', /^([^\n]+)/) | |
) | |
) | |
// createParser takes a parser type, and regex pattern and | |
// returns the corresponding parser function. The first match | |
// in parenthesis is taken as the content. If this parser does not | |
// suit your requirement write your own parser. | |
function createParser(type, pattern) { | |
return function(src) { | |
var match = src.match(pattern) | |
if (match) { | |
return [{type: type, content: match[1]}, src.replace(pattern, '')] | |
} else { | |
return null | |
} | |
} | |
} | |
// createParser is not suitable for parsing links, images. So we create our own. | |
function linkParser(src) { | |
var pattern = /^\[(.+?)\]\((.+?)\)/ | |
var match = src.match(pattern) | |
if (match) { | |
return [{type: "link", content: match[1], href: match[2]}, src.replace(pattern, '')] | |
} else { | |
return null | |
} | |
} | |
function imageParser(src) { | |
var pattern = /^\!\[(.+?)\]\((.+?)\)/ | |
var match = src.match(pattern) | |
if (match) { | |
return [{type: "image", alt: match[1], href: match[2]}, src.replace(pattern, '')] | |
} else { | |
return null | |
} | |
} | |
// createBodyParser takes as its first argument the "type" of the parser. The rest of the | |
// arguments are the parsers that make up the bodyParser | |
function createBodyParser() { | |
var parsers = Array.prototype.slice.call(arguments), | |
ast = {type: parsers.shift(), body: []} | |
return function bodyParser(src) { | |
var parser, test | |
for (var i = 0; i < parsers.length; i++) { | |
parser = parsers[i] | |
test = parser(src) | |
if (test) { | |
ast.body.push(test[0]) | |
return bodyParser(test[1]) | |
} | |
} | |
if (ast.body.length === 0) { | |
return null | |
} else { | |
var ret = [ast, src] | |
ast = {type: ast.type, body: []} | |
return ret | |
} | |
} | |
} | |
var ast = markdownParser(src)[0] | |
console.log(JSON.stringify(ast, null, 4)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment