Last active
February 15, 2018 05:24
-
-
Save MadeByMike/1c4ca3ecbae07ef46b9e to your computer and use it in GitHub Desktop.
Highlighting a sentence in HTML without destroying the markup around it is exceptionally hard!!!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*! | |
* Highlight words in html - v0.0.1 - 2015-04-29 | |
* http://madebymike.com.au | |
* Copyright (c) 2015 Mike Riethmuller; Licensed MIT | |
*/ | |
/* | |
* It turns out that annotating a phrase within HTML is exceptionally hard. I've done it so you don't have to. | |
* The function accepts a phrase (string), HTML (string), and optionally a classname (string) | |
* It returns the same HTML with <mark> elements surrounding the phrase | |
* Phrases can be separated by inline elements such as <strong> or <em> or can be split across block elements | |
* Ignores line breaks, images and empty elements. All that matters is that the words are in order. | |
*/ | |
function highlightWordsInHTML(phrase, source, classname){ | |
var DOM = document.createElement("div"); | |
DOM.innerHTML = source; | |
var matches = []; | |
var matched = ""; | |
var node = DOM; | |
var depth = 0; | |
var tmp,skip; | |
do { | |
// Examine text nodes, ignore empty nodes | |
if(!skip && node.nodeType === 3 && node.nodeValue.trim() !== ""){ | |
// try and match as much of the sentence as possible. Start with full length of sentence less what we've matched so far. | |
var i = phrase.length - matched.length; | |
while (node.textContent.indexOf(phrase.substr(matched.length, i)) === -1){ | |
i--; // Decrement the length of the substring until we match a portion of the sentence | |
} | |
if(i !== 0){ | |
// Sorry about this! To be a valid match different conditions must be met depending on if it is the first part, a middle part or the last part of the sentence | |
if(( | |
matched.length === 0 // We haven't previously matched part of the sentence | |
&& (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The matched part ends at the end of the textnode | |
) || ( | |
matched.length !== 0 // We have previously matched part of the sentence | |
&& (matched.length + i) < phrase.length // The next matched part is not the last part of the sentence | |
&& node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode | |
&& (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The next matched part ends at the end of the textnode | |
) || ( | |
matched.length !== 0 // We have previously matched part of the sentence | |
&& (matched.length + i) >= phrase.length // The next matched part is the last part of the sentence | |
&& node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode | |
)){ | |
// Update matched string and store some details in the matches array for the later find and replace step | |
matches.push({ | |
node: node, | |
matched: phrase.substr(matched.length, i) | |
}); | |
matched = matched + phrase.substr(matched.length, i); | |
} | |
} else { | |
// If i === 0 we failed to find the next part of the sentence, reset the matched string and matches array | |
matched = ""; | |
matches = []; | |
} | |
} | |
// This is a simple DOM walker, we're getting the next child or sibling | |
// Check out https://gist.github.com/cowboy/958000 for more details | |
if (!skip && (tmp = node.firstChild)) { | |
depth++; | |
} else if (tmp = node.nextSibling) { | |
skip = false; | |
} else { | |
tmp = node.parentNode; | |
depth--; | |
skip = true; | |
} | |
node = tmp; | |
} while ( depth > 0 && matched !== phrase ); // Keep walking until all elements have been traversed or the sentence has been fully matched. | |
if (depth <= 0){ // If depth is 0 the sentence has not been found | |
console.log('phrase could not be found'); | |
return false; | |
} | |
// Finally let's highlight it!! | |
matches.forEach(function(obj,i){ | |
var re, pos, mark; | |
var fragment = document.createDocumentFragment(); | |
if(phrase.indexOf(obj.matched) === 0){ | |
// First part of sentence should find only the last occurrence of a match | |
re = new RegExp("("+obj.matched+"\(\?\!\.\*"+obj.matched+"\))","i"); | |
} else { | |
// Subsequent parts of sentence should find the first occurrence of a match | |
re = new RegExp("("+obj.matched+")","i"); | |
} | |
pos = re.exec(obj.node.nodeValue).index; | |
if(pos >= 0) { | |
// create mark element | |
mark = document.createElement("mark"); | |
mark.className = classname || ""; | |
mark.appendChild(document.createTextNode(obj.matched)); | |
if(pos > 0){ | |
// Begining of sentence: Split textnode then add annotated section | |
fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(0, pos))); | |
fragment.appendChild(mark); | |
} else if((pos + obj.matched.length) < obj.node.nodeValue.length) { | |
// End of sentence: Add annotated section then add remainder of textnode | |
fragment.appendChild(mark); | |
fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(obj.matched.length))); | |
} else { | |
// Middle of sentence: just add annotated section | |
fragment.appendChild(mark); | |
} | |
} | |
// Replace the text node with the fragment we created | |
obj.node.parentNode.replaceChild(fragment,obj.node); | |
}); | |
return DOM.innerHTML; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment