Created
September 11, 2017 21:37
-
-
Save rezen/2c11a1ac76f5922476b5d2ebf0ce6283 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
/** | |
* https://github.com/SalesforceEng/secure-filters | |
* | |
* @description | |
* Frequently strings are dirty and need to | |
* have some HTML removed. Often times you may | |
* want to control the filtering a bit more, for | |
* example removing all elements except anchors. | |
* Maybe you want to remove all tags except | |
* anchors that have [data-keep]. | |
* | |
* This module supports removing HTML with | |
* options for finer-grained control | |
* | |
*/ | |
function TrimHtml() { | |
/** | |
* Regexs to find element tags | |
* @type {Object} | |
*/ | |
var regex = { | |
// @<[\/\!]*?[^<>]*?>@si -alternative regex | |
// tags : /<\/?([a-z]+)([a-z0-9\=\"\'\.\?\:\s\/]+)?\>/g, | |
tags : /<\/?([a-z]+)([a-z0-9\=\"\'\.\?\:\s\/]+)?\>/g, | |
tagAndContent : function(element) { | |
// Original /\<([a-z]+)([a-z0-9\=\"\'\.\?\:\s\/]+)?\>(.*)<\/[a-z]+\>/g | |
return new RegExp('\\<' + element + '([a-z0-9\\=\"\'\\.\\?\\:\\s\/]+)?\\>(.*)<\/'+element+'\\>', 'g'); | |
} | |
}; | |
/** | |
* Hash map of functions for custom replace rules for an element | |
* @type {Object} | |
*/ | |
var replaceRules = {}; | |
/** | |
* Tags that we also want to remove the inner content for | |
* @type {Array} | |
*/ | |
var removeInner = [ | |
'style', | |
'script' | |
]; | |
var HTML_CONTROL = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]/g; | |
this.escape = function(string) { | |
return string.replace(/\&/g, '&') | |
.replace(/\"/g, '"') | |
.replace(/</g, '<') | |
.replace(/\>/g, '>'); | |
} | |
/** | |
* Strip the html tags from a given string | |
* @param {String} string | |
* @return {String} | |
*/ | |
this.trim = function(string) { | |
string = string.replace(HTML_CONTROL, ' '); | |
/** | |
* Handles the initial string replace | |
* which removes elements but maintains | |
* their content | |
* @param {String} string | |
* @param {String} el | |
* @return {String} | |
*/ | |
function replaceTags(string, el) { | |
/** | |
* If the tag does not need to be | |
* completely removed with it's | |
* content .. handle here | |
*/ | |
if (removeInner.indexOf(el) !== -1) { | |
return string; | |
} | |
/** | |
* If there a custom rule for the replacing | |
* the element .. | |
*/ | |
if (replaceRules[el] !== undefined) { | |
return replaceRules[el](string); | |
} | |
return ''; | |
} | |
/** | |
* Replace matches for element and contents | |
* @param {String} string [description] | |
* @param {String} el [description] | |
* @return {String} [description] | |
*/ | |
function replaceTagsContent(string, el) { | |
return ''; | |
} | |
// Remove tags but not content | |
var trimmed = string.replace(regex.tags, replaceTags); | |
// Remove tags AND content | |
for (var idx in removeInner) { | |
var _regex; | |
_regex = regex.tagAndContent(removeInner[idx]); | |
trimmed = trimmed.replace(_regex, replaceTagsContent); | |
} | |
return trimmed; | |
}; | |
/** | |
* Add an element to the list of elements | |
* that we remove the element and content | |
* @param {String} elementName | |
*/ | |
this.addRemoveInner = function(elementName) { | |
removeInner.push(elementName); | |
}; | |
/** | |
* Add a custom replace rule | |
* @param {String} elementName | |
* @param {Function} func | |
*/ | |
this.addReplaceRule = function(elementName, func) { | |
replaceRules[ elementName ] = func; | |
}; | |
return this; | |
} | |
module.exports = TrimHtml; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment