Created
May 1, 2025 18:22
-
-
Save webstrand/9ea2b148575eacf6f161848ee955289a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const SYNTAX_CHARACTER = "\\^$\\\\.*+?()[\\]{}|/"; | |
const TABLE_67_CHARACTER = "\\t\\n\\v\\f\\r"; | |
const OTHER_PUNCTUATORS_CHARACTER = ",\\-=<>#&!%:;@~\\'\\\"\\`"; | |
const WHITE_SPACE_CHARACTER = | |
"\\t\\v\\f\\uFEFF \\u00A0\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000"; | |
const LINE_TERMINATOR_CHARACTER = "\\n\\r\\u2028\\u2029"; | |
const MATCH_MALFORMED_LEADING_SURROGATE = | |
"[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])"; | |
const MATCH_MALFORMED_TRAILING_SURROGATE = | |
"(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF]"; | |
const matchFirstAlphanumeric = /^[A-Za-z0-9]/; | |
const findNextEscape = new RegExp(`[${SYNTAX_CHARACTER}]`, "g"); | |
const findNextEncode = new RegExp( | |
`[${TABLE_67_CHARACTER}${OTHER_PUNCTUATORS_CHARACTER}${WHITE_SPACE_CHARACTER}${LINE_TERMINATOR_CHARACTER}]|${MATCH_MALFORMED_LEADING_SURROGATE}|${MATCH_MALFORMED_TRAILING_SURROGATE}`, | |
"g" | |
); | |
function RegExp_escape(S: string) { | |
if (typeof S !== "string") throw new TypeError("Argument is not a string"); | |
let cursor; | |
let accumulator; | |
// Handle special case for an alphanumeric first character | |
// and initialize the cursor and accumulator | |
if (matchFirstAlphanumeric.test(S)) { | |
cursor = 1; | |
accumulator = "\\x" + S.charCodeAt(0).toString(16); | |
} else { | |
cursor = 0; | |
accumulator = ""; | |
} | |
// Find the first escapable character or encoable character | |
let hasNextEscape = findNextEscape.test(S); | |
let hasNextEncode = findNextEncode.test(S); | |
for (;;) { | |
const escapeIndex = findNextEscape.lastIndex; | |
const encodeIndex = findNextEncode.lastIndex; | |
// Do escape or encode depending on which match is closer | |
if (hasNextEscape && (!hasNextEncode || escapeIndex < encodeIndex)) { | |
accumulator += S.slice(cursor, escapeIndex - 1) + "\\"; | |
cursor = escapeIndex - 1; | |
hasNextEscape = findNextEscape.test(S); | |
} else if (hasNextEncode && (!hasNextEscape || encodeIndex < escapeIndex)) { | |
const code = S.charCodeAt(encodeIndex - 1); | |
const slice = S.slice(cursor, encodeIndex - 1); | |
cursor = encodeIndex; | |
switch (code) { | |
case 0x09: | |
accumulator += slice + "\\t"; | |
break; | |
case 0x0a: | |
accumulator += slice + "\\n"; | |
break; | |
case 0x0b: | |
accumulator += slice + "\\v"; | |
break; | |
case 0x0c: | |
accumulator += slice + "\\f"; | |
break; | |
case 0x0d: | |
accumulator += slice + "\\r"; | |
break; | |
default: | |
const hex = code.toString(16); | |
accumulator += slice + (code <= 0xff ? "\\x" : "\\u") + hex; | |
} | |
hasNextEncode = findNextEncode.test(S); | |
} else { | |
return accumulator + S.slice(cursor); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment