Last active
August 29, 2015 13:56
-
-
Save wizard04wsu/8831356 to your computer and use it in GitHub Desktop.
Functions to encode/decode text for use in HTML 4.01.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//encode reserved characters in a string for use in HTML | |
//if `keepValidEntities` is true, the amphersands for valid character entity references will not be encoded | |
function textToHTML(str, keepValidEntities){ | |
"use strict"; | |
var validEntityNames, rxp; | |
if(keepValidEntities){ | |
//see http://www.w3.org/TR/html401/sgml/entities.html | |
validEntityNames = ""+ | |
//markup-significant and internationalization characters | |
"quot|amp|lt|gt|OElig|oelig|Scaron|scaron|Yuml|circ|tilde|ensp|emsp|thinsp|zwnj|zwj|lrm|"+ | |
"rlm|ndash|mdash|lsquo|rsquo|sbquo|ldquo|rdquo|bdquo|dagger|Dagger|permil|lsaquo|rsaquo|euro|"+ | |
//ISO 8859-1 characters | |
"nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|"+ | |
"plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|"+ | |
"Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|"+ | |
"Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|"+ | |
"Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|"+ | |
"igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|"+ | |
"uacute|ucirc|uuml|yacute|thorn|yuml|"+ | |
//symbols, mathematical symbols, and Greek letters | |
"fnof|Alpha|Beta|Gamma|Delta|Epsilon|Zeta|Eta|Theta|Iota|Kappa|Lambda|Mu|Nu|Xi|Omicron|Pi|"+ | |
"Rho|Sigma|Tau|Upsilon|Phi|Chi|Psi|Omega|alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|"+ | |
"kappa|lambda|mu|nu|xi|omicron|pi|rho|sigmaf|sigma|tau|upsilon|phi|chi|psi|omega|thetasym|"+ | |
"upsih|piv|bull|hellip|prime|Prime|oline|frasl|weierp|image|real|trade|alefsym|larr|uarr|rarr|"+ | |
"darr|harr|crarr|lArr|uArr|rArr|dArr|hArr|forall|part|exist|empty|nabla|isin|notin|ni|prod|"+ | |
"sum|minus|lowast|radic|prop|infin|ang|and|or|cap|cup|int|there4|sim|cong|asymp|ne|equiv|"+ | |
"le|ge|sub|sup|nsub|sube|supe|oplus|otimes|perp|sdot|lceil|rceil|lfloor|rfloor|lang|rang|"+ | |
"loz|spades|clubs|hearts|diams"; | |
rxp = new RegExp("&(?!(?:#([0-9]+|[xX][a-fA-F0-9]+)|"+validEntityNames+");)", "g"); | |
str = str.replace(rxp, "&"); //encode amphersands that are not part of a valid character entity reference | |
} | |
else{ | |
str = str.replace(/&/g, "&"); //encode all amphersands | |
} | |
//encode the other markup-significant characters | |
return str.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); | |
} | |
//decode all HTML character entity references in the string (not just the reserved characters) | |
function HTMLToText(str){ | |
"use strict"; | |
var tmp; | |
tmp = document.createElement("div"); | |
tmp.innerHTML = str.replace(/</g, "<").replace(/>/g, ">"); | |
return tmp.firstChild.nodeValue; | |
} | |
//escapes the string for use as a JavaScript string value in embedded or inline code | |
//See http://code.google.com/p/doctype/wiki/ArticleXSSInJavaScript | |
//usage example: | |
// document.body.innerHTML += ("<script>console.log(\"" + textToJavaScriptString(userInput) + "\")</script>"; | |
function textToJavaScriptString(str){ | |
"use strict"; | |
str = str.replace(/\\/g, "\\\\"); | |
//prevent "escape from the quote" attacks by escaping quotes and line feed characters | |
str = str.replace(/'/g, "\\u0027").replace(/"/g, "\\u0022"); | |
str = str.replace(/\u0009/ig, "\\t").replace(/\u000A/ig, "\\n").replace(/\u000D/ig, "\\r").replace(/\u0085/ig, "\\u0085"); //tab, line feed, carriage return, next line | |
str = str.replace(/\u2028/ig, "\\u2028").replace(/\u2029/ig, "\\u2029"); //line separator, paragraph separator | |
//prevent string from closing the tag | |
str = str.replace(/</g, "\\u003C").replace(/>/g, "\\u003E"); | |
//these are escaped just in case ("defense-in-depth") | |
str = str.replace(/&/g, "\\u0026").replace(/=/g, "\\u003D"); | |
return str; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment