Created
July 26, 2011 23:23
-
-
Save arextar/1108344 to your computer and use it in GitHub Desktop.
Simple html fragment parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parse=(function(document,r_start,r_attr,r_end,cache){ | |
return function(str){ | |
//If there is already a cached element, return its clone | |
if(cache[str]) return cache[str].cloneNode(true) | |
//frag and par are originally the same. par is the current parent and frag is the document fragment being returned | |
var frag,par=frag=document.createDocumentFragment(); | |
//Function to parse after a tag has closed | |
function parse_e(){ | |
//If there is no string, no need to continue | |
if(str){ | |
//Execute a regular expression, then remove it from the string | |
var ret=r_end.exec(str); | |
str=str.replace(r_end,"") | |
//If there is text, append it | |
ret[1]&&par.appendChild(document.createTextNode(ret[1])) | |
//go up to the previous parent | |
par=par.parentNode||frag; | |
//Parse the next part | |
parse_s(); | |
} | |
} | |
//Parse before a tag starts | |
function parse_s(ret,elem,a,e){ | |
//if there is even a string and the regular expression comes up with something | |
if(str&&(ret=r_start.exec(str))){ | |
//remove that regular expression from the string | |
str=str.replace(r_start,"") | |
//If there is text, append it | |
ret[1]&&par.appendChild(document.createTextNode(ret[1])) | |
//If there is a tag | |
if(ret[2]){ | |
//Create and append an element with the tag | |
elem=par.appendChild(document.createElement(ret[2])) | |
//assign a variable to the attribute section of results | |
a=ret[3] | |
//While there is still an attribute | |
while(e=r_attr.exec(a)){ | |
//Remove it | |
a=a.replace(r_attr,""); | |
//And set it to the element | |
e[1]?elem.setAttribute(e[1],e[2]||e[3]):elem.setAttribute(e[4],e[4]) | |
} | |
} | |
//If the element is not unary, make it the new parent | |
if(!ret[4]){ | |
par=elem; | |
} | |
//If an end is in sight, parse that, otherwise parse a new element | |
r_end.test(str)?parse_e():parse_s(); | |
} | |
} | |
//begin the process | |
parse_s() | |
return (cache[str]=frag).cloneNode(true); | |
} | |
})(document, | |
//r_start: | |
/^([^<]*)(?:<(\w+)\s*((?:\s*\w+\s*(?:=\s*(?:'[^']*'|"[^"]*"))?)*)\s*(\/?)>)?/, | |
//r_end: | |
/\s*(\w+)\s*=\s*(?:'([^']*)'|"([^"]*)")|\s*(\w+)/, | |
//r_attr: | |
/^([^<]*)<\/(\w+)>/, | |
//cache: | |
{}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment