Last active
December 3, 2023 07:56
-
-
Save erkobridee/d2f04e4ce5c6844a1e16788d58107e7e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
useful references: | |
String.prototype.replace() | |
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace | |
https://alligator.io/js/string-replace/ | |
Encode and Decode HTML entities using pure Javascript | |
https://ourcodeworld.com/articles/read/188/encode-and-decode-html-entities-using-pure-javascript | |
HTML Entities Encoder / Decoder - online tool | |
https://www.web2generators.com/html-based-tools/online-html-entities-encoder-and-decoder | |
*/ | |
interface IEntityDefinition { | |
char: string; | |
entity: string; | |
description: string; | |
} | |
const HTML_ENTITY_SPACE: IEntityDefinition = { | |
char: ' ', | |
entity: ' ', | |
description: 'non-breaking space' | |
}; | |
const HTML_ENTITIES: IEntityDefinition[] = [ | |
{ | |
char: '"', | |
entity: '"', | |
description: 'quotation mark' | |
}, | |
{ | |
char: "'", | |
entity: ''', | |
description: 'apostrophe' | |
}, | |
{ | |
char: '&', | |
entity: '&', | |
description: 'ampersand' | |
}, | |
{ | |
char: '<', | |
entity: '<', | |
description: 'less-than' | |
}, | |
{ | |
char: '>', | |
entity: '>', | |
description: 'greater-than' | |
}, | |
{ | |
char: '¡', | |
entity: '¡', | |
description: 'inverted exclamation mark' | |
}, | |
{ | |
char: '¢', | |
entity: '¢', | |
description: 'cent' | |
}, | |
{ | |
char: '€', | |
entity: '€', | |
description: 'euro' | |
}, | |
{ | |
char: '£', | |
entity: '£', | |
description: 'pound' | |
}, | |
{ | |
char: '¤', | |
entity: '¤', | |
description: 'currency' | |
}, | |
{ | |
char: '¥', | |
entity: '¥', | |
description: 'yen' | |
}, | |
{ | |
char: '¦', | |
entity: '¦', | |
description: 'broken vertical bar' | |
}, | |
{ | |
char: '§', | |
entity: '§', | |
description: 'section' | |
}, | |
{ | |
char: '¨', | |
entity: '¨', | |
description: 'spacing diaeresis' | |
}, | |
{ | |
char: '©', | |
entity: '©', | |
description: 'copyright' | |
}, | |
{ | |
char: 'ª', | |
entity: 'ª', | |
description: 'feminine ordinal indicator' | |
}, | |
{ | |
char: '«', | |
entity: '«', | |
description: 'angle quotation mark (left)' | |
}, | |
{ | |
char: '¬', | |
entity: '¬', | |
description: 'negation' | |
}, | |
{ | |
char: '®', | |
entity: '®', | |
description: 'registered trademark' | |
}, | |
{ | |
char: '¯', | |
entity: '¯', | |
description: 'spacing macron' | |
}, | |
{ | |
char: '°', | |
entity: '°', | |
description: 'degree' | |
}, | |
{ | |
char: '±', | |
entity: '±', | |
description: 'plus-or-minus ' | |
}, | |
{ | |
char: '²', | |
entity: '²', | |
description: 'superscript 2' | |
}, | |
{ | |
char: '³', | |
entity: '³', | |
description: 'superscript 3' | |
}, | |
{ | |
char: '´', | |
entity: '´', | |
description: 'spacing acute' | |
}, | |
{ | |
char: 'µ', | |
entity: 'µ', | |
description: 'micro' | |
}, | |
{ | |
char: '¶', | |
entity: '¶', | |
description: 'paragraph' | |
}, | |
{ | |
char: '·', | |
entity: '·', | |
description: 'middle dot' | |
}, | |
{ | |
char: '¸', | |
entity: '¸', | |
description: 'spacing cedilla' | |
}, | |
{ | |
char: '¹', | |
entity: '¹', | |
description: 'superscript 1' | |
}, | |
{ | |
char: 'º', | |
entity: 'º', | |
description: 'masculine ordinal indicator' | |
}, | |
{ | |
char: '»', | |
entity: '»', | |
description: 'angle quotation mark (right)' | |
}, | |
{ | |
char: '¼', | |
entity: '¼', | |
description: 'fraction 1/4' | |
}, | |
{ | |
char: '½', | |
entity: '½', | |
description: 'fraction 1/2' | |
}, | |
{ | |
char: '¾', | |
entity: '¾', | |
description: 'fraction 3/4' | |
}, | |
{ | |
char: '¿', | |
entity: '¿', | |
description: 'inverted question mark' | |
}, | |
{ | |
char: '×', | |
entity: '×', | |
description: 'multiplication' | |
}, | |
{ | |
char: '÷', | |
entity: '÷', | |
description: 'division' | |
}, | |
{ | |
char: 'À', | |
entity: 'À', | |
description: 'capital a, grave accent' | |
}, | |
{ | |
char: 'Á', | |
entity: 'Á', | |
description: 'capital a, acute accent' | |
}, | |
{ | |
char: 'Â', | |
entity: 'Â', | |
description: 'capital a, circumflex accent' | |
}, | |
{ | |
char: 'Ã', | |
entity: 'Ã', | |
description: 'capital a, tilde' | |
}, | |
{ | |
char: 'Ä', | |
entity: 'Ä', | |
description: 'capital a, umlaut mark' | |
}, | |
{ | |
char: 'Å', | |
entity: 'Å', | |
description: 'capital a, ring' | |
}, | |
{ | |
char: 'Æ', | |
entity: 'Æ', | |
description: 'capital ae' | |
}, | |
{ | |
char: 'Ç', | |
entity: 'Ç', | |
description: 'capital c, cedilla' | |
}, | |
{ | |
char: 'È', | |
entity: 'È', | |
description: 'capital e, grave accent' | |
}, | |
{ | |
char: 'É', | |
entity: 'É', | |
description: 'capital e, acute accent' | |
}, | |
{ | |
char: 'Ê', | |
entity: 'Ê', | |
description: 'capital e, circumflex accent' | |
}, | |
{ | |
char: 'Ë', | |
entity: 'Ë', | |
description: 'capital e, umlaut mark' | |
}, | |
{ | |
char: 'Ì', | |
entity: 'Ì', | |
description: 'capital i, grave accent' | |
}, | |
{ | |
char: 'Í', | |
entity: 'Í', | |
description: 'capital i, acute accent' | |
}, | |
{ | |
char: 'Î', | |
entity: 'Î', | |
description: 'capital i, circumflex accent' | |
}, | |
{ | |
char: 'Ï', | |
entity: 'Ï', | |
description: 'capital i, umlaut mark' | |
}, | |
{ | |
char: 'Ð', | |
entity: 'Ð', | |
description: 'capital eth, Icelandic' | |
}, | |
{ | |
char: 'Ñ', | |
entity: 'Ñ', | |
description: 'capital n, tilde' | |
}, | |
{ | |
char: 'Ò', | |
entity: 'Ò', | |
description: 'capital o, grave accent' | |
}, | |
{ | |
char: 'Ó', | |
entity: 'Ó', | |
description: 'capital o, acute accent' | |
}, | |
{ | |
char: 'Ô', | |
entity: 'Ô', | |
description: 'capital o, circumflex accent' | |
}, | |
{ | |
char: 'Õ', | |
entity: 'Õ', | |
description: 'capital o, tilde' | |
}, | |
{ | |
char: 'Ö', | |
entity: 'Ö', | |
description: 'capital o, umlaut mark' | |
}, | |
{ | |
char: 'Ø', | |
entity: 'Ø', | |
description: 'capital o, slash' | |
}, | |
{ | |
char: 'Ù', | |
entity: 'Ù', | |
description: 'capital u, grave accent' | |
}, | |
{ | |
char: 'Ú', | |
entity: 'Ú', | |
description: 'capital u, acute accent' | |
}, | |
{ | |
char: 'Û', | |
entity: 'Û', | |
description: 'capital u, circumflex accent' | |
}, | |
{ | |
char: 'Ü', | |
entity: 'Ü', | |
description: 'capital u, umlaut mark' | |
}, | |
{ | |
char: 'Ý', | |
entity: 'Ý', | |
description: 'capital y, acute accent' | |
}, | |
{ | |
char: 'Þ', | |
entity: 'Þ', | |
description: 'capital THORN, Icelandic' | |
}, | |
{ | |
char: 'ß', | |
entity: 'ß', | |
description: 'small sharp s, German' | |
}, | |
{ | |
char: 'à', | |
entity: 'à', | |
description: 'small a, grave accent' | |
}, | |
{ | |
char: 'á', | |
entity: 'á', | |
description: 'small a, acute accent' | |
}, | |
{ | |
char: 'â', | |
entity: 'â', | |
description: 'small a, circumflex accent' | |
}, | |
{ | |
char: 'ã', | |
entity: 'ã', | |
description: 'small a, tilde' | |
}, | |
{ | |
char: 'ä', | |
entity: 'ä', | |
description: 'small a, umlaut mark' | |
}, | |
{ | |
char: 'å', | |
entity: 'å', | |
description: 'small a, ring' | |
}, | |
{ | |
char: 'æ', | |
entity: 'æ', | |
description: 'small ae' | |
}, | |
{ | |
char: 'ç', | |
entity: 'ç', | |
description: 'small c, cedilla' | |
}, | |
{ | |
char: 'è', | |
entity: 'è', | |
description: 'small e, grave accent' | |
}, | |
{ | |
char: 'é', | |
entity: 'é', | |
description: 'small e, acute accent' | |
}, | |
{ | |
char: 'ê', | |
entity: 'ê', | |
description: 'small e, circumflex accent' | |
}, | |
{ | |
char: 'ë', | |
entity: 'ë', | |
description: 'small e, umlaut mark' | |
}, | |
{ | |
char: 'ì', | |
entity: 'ì', | |
description: 'small i, grave accent' | |
}, | |
{ | |
char: 'í', | |
entity: 'í', | |
description: 'small i, acute accent' | |
}, | |
{ | |
char: 'î', | |
entity: 'î', | |
description: 'small i, circumflex accent' | |
}, | |
{ | |
char: 'ï', | |
entity: 'ï', | |
description: 'small i, umlaut mark' | |
}, | |
{ | |
char: 'ð', | |
entity: 'ð', | |
description: 'small eth, Icelandic' | |
}, | |
{ | |
char: 'ñ', | |
entity: 'ñ', | |
description: 'small n, tilde' | |
}, | |
{ | |
char: 'ò', | |
entity: 'ò', | |
description: 'small o, grave accent' | |
}, | |
{ | |
char: 'ó', | |
entity: 'ó', | |
description: 'small o, acute accent' | |
}, | |
{ | |
char: 'ô', | |
entity: 'ô', | |
description: 'small o, circumflex accent' | |
}, | |
{ | |
char: 'õ', | |
entity: 'õ', | |
description: 'small o, tilde' | |
}, | |
{ | |
char: 'ö', | |
entity: 'ö', | |
description: 'small o, umlaut mark' | |
}, | |
{ | |
char: 'ø', | |
entity: 'ø', | |
description: 'small o, slash' | |
}, | |
{ | |
char: 'ù', | |
entity: 'ù', | |
description: 'small u, grave accent' | |
}, | |
{ | |
char: 'ú', | |
entity: 'ú', | |
description: 'small u, acute accent' | |
}, | |
{ | |
char: 'û', | |
entity: 'û', | |
description: 'small u, circumflex accent' | |
}, | |
{ | |
char: 'ü', | |
entity: 'ü', | |
description: 'small u, umlaut mark' | |
}, | |
{ | |
char: 'ý', | |
entity: 'ý', | |
description: 'small y, acute accent' | |
}, | |
{ | |
char: 'þ', | |
entity: 'þ', | |
description: 'small thorn, Icelandic' | |
}, | |
{ | |
char: 'ÿ', | |
entity: 'ÿ', | |
description: 'small y, umlaut mark' | |
} | |
]; | |
//----------------------------------------------------------------------------// | |
type TEnDecodeFunction = (value: string) => string; | |
type TEnDecodeReplacerFunction = (substring: string, ...args: any[]) => string; | |
let charToEntityRegex: RegExp; | |
let entityToCharRegex: RegExp; | |
let charToEntityMap: Map<string, string>; | |
let entityToCharMap: Map<string, string>; | |
(function init() { | |
charToEntityMap = new Map(); | |
entityToCharMap = new Map(); | |
const charKeys: string[] = []; | |
const entityKeys: string[] = []; | |
HTML_ENTITIES.forEach(def => { | |
const { char, entity } = def; | |
entityToCharMap.set(entity, char); | |
entityKeys.push(entity); | |
charToEntityMap.set(char, entity); | |
charKeys.push(char); | |
}); | |
entityToCharMap.set(HTML_ENTITY_SPACE.entity, HTML_ENTITY_SPACE.char); | |
entityKeys.push(HTML_ENTITY_SPACE.entity); | |
charToEntityRegex = new RegExp('(' + charKeys.join('|') + ')', 'g'); | |
entityToCharRegex = new RegExp('(' + entityKeys.join('|') + '|&#[0-9]{1,5};' + ')', 'g'); | |
})(); | |
const decodeReplacer: TEnDecodeReplacerFunction = substring => { | |
return substring && entityToCharMap.has(substring) | |
? entityToCharMap.get(substring) || '' | |
: String.fromCharCode(parseInt(substring.substr(2), 10)); | |
}; | |
const encodeReplacer: TEnDecodeReplacerFunction = substring => { | |
return substring && charToEntityMap.has(substring) ? charToEntityMap.get(substring) || '' : ''; | |
}; | |
export const htmlEntitiesDecode: TEnDecodeFunction = value => | |
!value ? value : String(value).replace(entityToCharRegex, decodeReplacer); | |
export const htmlEntitiesEncode: TEnDecodeFunction = value => | |
!value ? value : String(value).replace(charToEntityRegex, encodeReplacer); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
thank you :) worked a charm