update
I've created a little repository that simply exposes the final utility as npm
module.
It's called html-escaper
there is basically one rule only: do not ever replace one char after another if you are transforming a string into another.
// WARNING: THIS IS WRONG
// if you are that kind of dev that does this
function escape(s) {
return s.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/'/g, "'")
.replace(/"/g, """);
}
// you might be the same dev that does this too
function unescape(s) {
return s.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/'/g, "'")
.replace(/"/g, '"');
}
// guess what we have here ?
unescape('&lt;');
// now guess this XSS too ...
unescape('&lt;script&gt;alert("yo")&lt;/script&gt;');
The last example will produce <script>alert("yo")</script>
instead of the expected <script>alert("yo")</script>
.
Nothing like this could possibly happen if we grab all chars at once and either ways.
It's just a fortunate case that after swapping &
with &
no other replace will be affected, but it's not portable and universally a bad practice.
Grab all chars at once, no excuses!
// with "any char" compatible HTML escaping
function escape(s) {
return s.replace(/[&<>'"]/g, function (m) {
return '&#' + m.charCodeAt(0) + ';';
});
}
// with predefined object (preferred)
function escape(s) {
var escaped = {
'&': '&',
'<': '<',
'>': '>',
"'": ''',
'"': '"'
};
return s.replace(/[&<>'"]/g, function (m) {
return escaped[m];
});
}
// with predefined object specific
// for HTML entities only
function unescape(s) {
var re = /&(?:amp|#38|lt|#60|gt|#62|apos|#39|quot|#34);/g;
var unescaped = {
'&': '&',
'&': '&',
'<': '<',
'<': '<',
'>': '>',
'>': '>',
''': "'",
''': "'",
'"': '"',
'"': '"'
};
return s.replace(re, function (m) {
return unescaped[m];
});
}
There is no risk with above code that any char after or before another could interfere with others, you escape and you unescape, it's a 1 to 1 operation, no surprises in the middle.
You'd like to have a little utility?
var html = require('html-escaper');
// you can test like this
var unescaped = '<&>"\'';
var escaped = html.escape(unescaped);
html.unescape(escaped) === unescaped;
Oh man, I’ve been there!