Skip to content

Instantly share code, notes, and snippets.

@webstrand
Created May 1, 2025 18:22
Show Gist options
  • Save webstrand/9ea2b148575eacf6f161848ee955289a to your computer and use it in GitHub Desktop.
Save webstrand/9ea2b148575eacf6f161848ee955289a to your computer and use it in GitHub Desktop.
const SYNTAX_CHARACTER = "\\^$\\\\.*+?()[\\]{}|/";
const TABLE_67_CHARACTER = "\\t\\n\\v\\f\\r";
const OTHER_PUNCTUATORS_CHARACTER = ",\\-=<>#&!%:;@~\\'\\\"\\`";
const WHITE_SPACE_CHARACTER =
"\\t\\v\\f\\uFEFF \\u00A0\\u1680\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200A\\u202F\\u205F\\u3000";
const LINE_TERMINATOR_CHARACTER = "\\n\\r\\u2028\\u2029";
const MATCH_MALFORMED_LEADING_SURROGATE =
"[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])";
const MATCH_MALFORMED_TRAILING_SURROGATE =
"(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF]";
const matchFirstAlphanumeric = /^[A-Za-z0-9]/;
const findNextEscape = new RegExp(`[${SYNTAX_CHARACTER}]`, "g");
const findNextEncode = new RegExp(
`[${TABLE_67_CHARACTER}${OTHER_PUNCTUATORS_CHARACTER}${WHITE_SPACE_CHARACTER}${LINE_TERMINATOR_CHARACTER}]|${MATCH_MALFORMED_LEADING_SURROGATE}|${MATCH_MALFORMED_TRAILING_SURROGATE}`,
"g"
);
function RegExp_escape(S: string) {
if (typeof S !== "string") throw new TypeError("Argument is not a string");
let cursor;
let accumulator;
// Handle special case for an alphanumeric first character
// and initialize the cursor and accumulator
if (matchFirstAlphanumeric.test(S)) {
cursor = 1;
accumulator = "\\x" + S.charCodeAt(0).toString(16);
} else {
cursor = 0;
accumulator = "";
}
// Find the first escapable character or encoable character
let hasNextEscape = findNextEscape.test(S);
let hasNextEncode = findNextEncode.test(S);
for (;;) {
const escapeIndex = findNextEscape.lastIndex;
const encodeIndex = findNextEncode.lastIndex;
// Do escape or encode depending on which match is closer
if (hasNextEscape && (!hasNextEncode || escapeIndex < encodeIndex)) {
accumulator += S.slice(cursor, escapeIndex - 1) + "\\";
cursor = escapeIndex - 1;
hasNextEscape = findNextEscape.test(S);
} else if (hasNextEncode && (!hasNextEscape || encodeIndex < escapeIndex)) {
const code = S.charCodeAt(encodeIndex - 1);
const slice = S.slice(cursor, encodeIndex - 1);
cursor = encodeIndex;
switch (code) {
case 0x09:
accumulator += slice + "\\t";
break;
case 0x0a:
accumulator += slice + "\\n";
break;
case 0x0b:
accumulator += slice + "\\v";
break;
case 0x0c:
accumulator += slice + "\\f";
break;
case 0x0d:
accumulator += slice + "\\r";
break;
default:
const hex = code.toString(16);
accumulator += slice + (code <= 0xff ? "\\x" : "\\u") + hex;
}
hasNextEncode = findNextEncode.test(S);
} else {
return accumulator + S.slice(cursor);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment