Skip to content

Instantly share code, notes, and snippets.

@scriptype
Last active April 22, 2017 15:22
Show Gist options
  • Save scriptype/a96279c03e55c5da5ab57f1e2622b890 to your computer and use it in GitHub Desktop.
Save scriptype/a96279c03e55c5da5ab57f1e2622b890 to your computer and use it in GitHub Desktop.
Flawed HTML Parser
(function(input) {
class HTMLFragment {
constructor(htmlText) {
this.raw = htmlText
this.parsed = HTMLFragment.parse(removeWhitespace(htmlText))
return this
}
static parse(htmlText) {
var elementPattern = /<(\w+)(.[^>]+)?>(.+)<\/\1>/
var attributesPattern = /(?:\w(?:-\w+)?)+(?:=(?:"|').[^"']+(?:"|'))?/g
var match = htmlText.match(elementPattern)
if (!match) {
return null
}
var [, tagName, attributesText, innerHTML ] = match
var firstChild = HTMLFragment.parse(innerHTML)
var attributes = attributesText
.trim()
.match(attributesPattern)
.map(attr => {
var [ key, value ] = attr.split('=')
return [ key, (value || '').slice(1, -1) ]
})
.reduce((prev, curr) => (prev[curr[0]] = curr[1] || true, prev), {})
return {
tagName,
attributes,
innerHTML,
firstChild
}
}
}
function removeWhitespace(html) {
return html.replace(/\n/g, '').replace(/>\s+/g, '>').trim()
}
return new HTMLFragment(input)
})(`
<div class="container pull-left" id="hello">
<h1 id="main-header" class="title" aria-role="title">Oh my header!</h1>
<p>
Lorem ipsum dolor sit amet <a href="http://enes.in">Enes</a>. So, good bye!
</p>
</div>
`)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment