Last active
October 4, 2016 00:02
-
-
Save voischev/767c2822b6fe12f1ac608aba2b1cc888 to your computer and use it in GitHub Desktop.
PostHTMLTree.js ideas [WIP]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
format PostHTMLTree | |
@see https://dev.w3.org/html5/html-author/ | |
*/ | |
/* | |
declarations | |
@see https://dev.w3.org/html5/html-author/#doctype-declaration | |
<!DOCTYPE | |
HTML | |
PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd"> | |
*/ | |
{ | |
type: 'declaration', | |
name: 'doctype', | |
raw: '<!DOCTYPE\n HTML\n PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
// <!-- comment --> | |
{ | |
type: 'declaration', | |
name: 'comment', | |
raw: '<!-- comment -->', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
// <![CDATA[x<y]]> | |
{ | |
type: 'declaration', | |
name: 'cdata', | |
raw: '<![CDATA[x<y]]>', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
// <?php $php = 1 ?> | |
{ | |
type: 'declaration', | |
name: 'php', | |
raw: '<?php $php = 1 ?>', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
elements | |
normal, void, raw text, RCDATA and foreign elements | |
@see https://dev.w3.org/html5/html-author/#elements | |
@see https://dev.w3.org/html5/html-author/#tags | |
@see https://dev.w3.org/html5/html-author/#void | |
@see https://dev.w3.org/html5/html-author/#raw-text-elements | |
@see https://dev.w3.org/html5/html-author/#rcdata-elements | |
@see https://dev.w3.org/html5/html-author/#foreign-elements & http://www.w3.org/TR/html5/syntax#foreign-elements | |
@see https://dev.w3.org/html5/html-author/#normal-elements | |
*/ | |
/* | |
tag | |
@see https://dev.w3.org/html5/html-author/#tags | |
example: <p>The quick brown fox jumps over the lazy dog.</p> | |
*/ | |
{ | |
type: 'element', | |
term: 'normal', | |
name: 'p', | |
syntax: 'normal', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
}, | |
content: [...] | |
} | |
// self-closing tag | |
// <p>The quick brown fox<br/> | |
// jumps over the lazy dog.</p> | |
{ | |
type: 'element', | |
term: 'void', | |
name: 'br', | |
syntax: 'self-closing', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
// <span name="value"/> | |
{ | |
type: 'element', | |
term: 'void', | |
name: 'span', | |
syntax: 'self-closing', | |
attrs: ... | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
void elements | |
@see @see https://dev.w3.org/html5/html-author/#void | |
example: <hr> | |
*/ | |
// <hr> | |
{ | |
type: 'element', | |
term: 'void', | |
name: 'hr', | |
syntax: 'normal', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
// <hr/> | |
{ | |
type: 'element', | |
term: 'void', | |
name: 'hr', | |
syntax: 'self-closing', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
raw-text elements | |
@see @see https://dev.w3.org/html5/html-author/#raw-text-elements | |
example: <script>var a = 'a';</script> | |
*/ | |
{ | |
type: 'element', | |
name: 'script', | |
spec: { | |
term: 'raw-text', | |
syntax: 'normal' | |
}, | |
data: 'var a = \'a\';', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
rcdata-elements elements | |
@see https://dev.w3.org/html5/html-author/#rcdata-elements | |
example: | |
<textarea> | |
This can contain character references like &, < and >, | |
but such characters and also be written directly as &, < and >. | |
Strings that look like <!-- comments --> or other elements <span> | |
are treated as plain text, instead of markup. | |
</textarea> | |
*/ | |
{ | |
type: 'element', | |
name: 'texarea', | |
spec: { | |
term: 'rcdata', | |
syntax: 'normal' | |
}, | |
data: 'This can contain character references like &, < and >,\n nbut such characters and also be written directly as &, < and >.\n Strings that look like <!-- comments --> or other elements <span>\n are treated as plain text, instead of markup.' | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
foreign-elements | |
@see https://dev.w3.org/html5/html-author/#foreign-elements & http://www.w3.org/TR/html5/syntax#foreign-elements | |
example: | |
<p> | |
<svg> | |
<metadata> | |
<!-- this is invalid --> | |
<cdr:license xmlns:cdr="http://www.example.com/cdr/metadata" name="MIT"/> | |
</metadata> | |
</svg> | |
</p> | |
*/ | |
{ | |
type: 'element', | |
spec: { | |
term: 'foreign', | |
syntax: 'self-closing' | |
}, | |
name: 'cdr:license', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
foreign-elements | |
@see https://dev.w3.org/html5/html-author/#normal-elements | |
example: <div><br/></div> | |
*/ | |
{ | |
type: 'element', | |
spec: { | |
term: 'normal', | |
syntax: 'normal' | |
}, | |
name: 'div', | |
content: [ | |
{ | |
type: 'element', | |
spec: { | |
term: 'void', | |
syntax: 'self-closing' | |
}, | |
name: 'br', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
], | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} | |
/* | |
text | |
example: Привет | |
Мир! | |
*/ | |
{ | |
type: 'text', | |
data: 'Привет\n Мир!', | |
position: { // for example | |
start: { line: 1, column: 1, offset: 0 }, | |
end: { line: 1, column: 16, offset: 15 } | |
} | |
} |
We could try one of these
or vanilla :), should the parser be able to handle spec related fixes like parse5 does, I think its better to be forgiving (svg, xml) like htmlparser2, while being spec compliant as possible (or provide 2 different parsing modes if impossible to implement). Do you have other resources, in terms of the parser architecture, in mind/available?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Any updates on this ? :)