✓ Bar
✓ A <p>Baz!</p> B
✓ <p>Foo</p> <script> a < b; </script> <span>Blam!</span>
✓ <p>Foo</p> <script type='text/javascript'> a < b || c > d; </script> <span>Blam!</span>
✓ D <p>Foo</p> <script type='text/javascript'>$('body').append('<' + 'script> a < b; <' + '/script>');</script> <span>Blam!</span> F <b>O</b>
✓ D <p>Foo</p> <script><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>
✓ D <p>Foo</p> <script type='text/javascript' ><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>
✓ <script type='text/javascript' > a < b || c > d; </script>
✓ <script > a < b || c > d; </script>
Last active
December 29, 2015 14:49
-
-
Save ElliotChong/7686596 to your computer and use it in GitHub Desktop.
HTML sanitization script that ignores brackets within <script> tags. Tested in V8- Will need the forEach methods swapped out to work in legacy JS runtimes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var tests = [ | |
"Bar", | |
"A <p>Baz!</p> B", | |
"<p>Foo</p> <script> a < b; </script> <span>Blam!</span>", | |
"<p>Foo</p> <script type='text/javascript'> a < b || c > d; </script> <span>Blam!</span>", | |
"D <p>Foo</p> <script type='text/javascript'>$('body').append('<' + 'script> a < b; <' + '/script>');</script> <span>Blam!</span> F <b>O</b>", | |
"D <p>Foo</p> <script><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>", | |
"D <p>Foo</p> <script type='text/javascript' ><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>", | |
"<script type='text/javascript' > a < b || c > d; </script>", | |
"<script > a < b || c > d; </script>" | |
]; | |
var expected = [ | |
"Bar", | |
"A <p>Baz!</p> B", | |
"<p>Foo</p> <script> a < b; </script> <span>Blam!</span>", | |
"<p>Foo</p> <script type='text/javascript'> a < b || c > d; </script> <span>Blam!</span>", | |
"D <p>Foo</p> <script type='text/javascript'>$('body').append('<' + 'script> a < b; <' + '/script>');</script> <span>Blam!</span> F <b>O</b>", | |
"D <p>Foo</p> <script><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>", | |
"D <p>Foo</p> <script type='text/javascript' ><![CDATA[ $('body').append('<script> a < b; </script>'); ]]></script> <span>Blam!</span> F <b>O</b>", | |
"<script type='text/javascript' > a < b || c > d; </script>", | |
"<script > a < b || c > d; </script>" | |
]; | |
// Caching the RegExps to avoid instantiating on each sanitize() call | |
var sanitizeRegExps = { | |
containsScriptWithBracket: /(<\s*script.*>.*(<|>)+.*<\s*\/\s*script\s*>)/, | |
isScript: /(<\s*script.*>)|(<\s*\/\s*script\s*>)/, | |
splitOnScript: /(<\s*script.*>.*<\s*\/\s*script\s*>)/, | |
splitOnFirstScript: /^(<\s*script)/, | |
splitOnLastScript: /(<\s*\/\s*script\s*>$)/, | |
lt: /</g, | |
gt: />/g | |
}; | |
function sanitize(p_string) | |
{ | |
function replaceBrackets(p_string) | |
{ | |
return p_string.replace(sanitizeRegExps.lt, '<').replace(sanitizeRegExps.gt, '>'); | |
} | |
if (sanitizeRegExps.containsScriptWithBracket.test(p_string)) | |
{ | |
var modified = ""; | |
p_string.split(sanitizeRegExps.splitOnScript).forEach( | |
function (p_split) | |
{ | |
if (!sanitizeRegExps.isScript.test(p_split)) | |
{ | |
modified += replaceBrackets(p_split); | |
} | |
else | |
{ | |
var splits = p_split.split(sanitizeRegExps.splitOnFirstScript); | |
// This shouldn't ever happen, but just incase there is some browser inconsistency or something... | |
if (splits.length !== 3) | |
{ | |
console.warn("Unexpected <script> string passed, defaulting to sanitizing it.\n", splits); | |
return modified += replaceBrackets(p_split); | |
} | |
modified += replaceBrackets(splits[1]); | |
scriptEndIndex = splits[2].indexOf(">") + 1; | |
modified += replaceBrackets(splits[2].slice(0, scriptEndIndex)); | |
splits = splits[2].slice(scriptEndIndex).split(sanitizeRegExps.splitOnLastScript); | |
modified += splits[0]; | |
modified += replaceBrackets(splits[1]); | |
} | |
} | |
); | |
return modified; | |
} | |
else | |
{ | |
return replaceBrackets(p_string); | |
} | |
} | |
tests.forEach( | |
function (p_value, p_index) | |
{ | |
var sanitized = sanitize(p_value); | |
if (sanitized != expected[p_index]) | |
{ | |
console.log("✘\t", sanitized, "\n"); | |
} | |
else | |
{ | |
console.log("✓\t", sanitized, "\n"); | |
} | |
} | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment