kalda341 · October 19, 2021 20:18
diff --git a/canonical.ts b/canonical.ts
 type JSONValue =
  | string
  | number
  | null
  | boolean
  | JSONValue[]
  // Record<string, JSONValue> is a circular value, so doesn't work.
  // This is semantically equivalent.
  | { [K: string]: JSONValue | undefined };

 export const decode = (data: string): JSONValue => {
  // Canonical JSON is a subset of regualar JSON, so the regular
  // decode will work correctly.
  return JSON.parse(data);
 };

 export const encode = (data: JSONValue, maxDepth: number = 100): string =>
  encodeToArray(data, maxDepth).join("");

 // If you're really fussed about performance you'd replace this
 // with a more performant implmentation.
 // Slice removes the final element, as we only want it between
 // others.
 const intersperse = <T>(element: T, array: readonly T[]): T[] =>
  array.flatMap((x) => [x, element]).slice(0, -1);

 // We could return a string from here, however there is a performance
 // benefit to doing one large string join at the end compared to many
 // along the way.
 // Note: This is a fully functional implmentation, you may get some
 // performance benefits to implementing in a non functional way.
 // Note: Rather than using the spread operator, and .flat(), we could allow for
 // any depth nested lists of chars and then flatten these all at once.
 // (Like Erlang does with it's IOList). This could have a minor performance advantage.
 // Typescript makes it difficult to specify such an operation, so we will leave
 // it as a regular string[];
 const encodeToArray = (
  node: JSONValue,
  maxDepth: number
 ): readonly string[] => {
  // Avoid encoding recursive structures
  if (maxDepth === 0) {
    throw new Error("Max depth exceeded.");
  }
  if (node === null) {
    return ["null"];
  } else if (typeof node === "boolean") {
    return [node.toString()];
  } else if (typeof node === "number" && Number.isInteger(node)) {
    // Integer numbers are desplayed in regular decimal notation
    return [node.toString()];
  } else if (typeof node === "number") {
    return [encodeExponential(node)];
  } else if (typeof node === "string") {
    return ['"', sanitiseString(node), '"'];
  } else if (Array.isArray(node)) {
    return [
      "[",
      ...intersperse(
        ",",
        // We need to recursively encode the contents
        node.flatMap((x) => encodeToArray(x, maxDepth - 1))
      ),
      "]",
    ];
  } else if (typeof node === "object") {
    return [
      "{",
      ...intersperse(
        [","],
        Object.keys(node)
          // Keys must be sorted before encoding.
          .sort()
          .map((k): [string, JSONValue | undefined] => [k, node[k]])
          // JSON.encode removes undefined values - we will too
          .filter(valueIsNotUndefined)
          // We need to recursively encode the contents
          .map(([k, v]) => [
            ...encodeToArray(k, maxDepth - 1),
            // Keys and values are separated by :
            ":",
            ...encodeToArray(v, maxDepth - 1),
          ])
      ).flat(),
      "}",
    ];
  } else {
    throw new Error("Node is invalid JSONValue.");
  }
 };

 const valueIsNotUndefined = <T>(
  pair: [string, T | undefined]
 ): pair is [string, T] => pair[1] !== undefined;

 const encodeExponential = (node: number): string => {
  // If we're dealing with a number but not an integer we should encode
  // in exponential notation
  const exponent = Math.floor(Math.log10(Math.abs(node)));
  const exponentString = exponent.toString();
  const mantissa = node / Math.pow(10, exponent);
  // Ensure decimal point exists
  const mantissaString = Number.isInteger(mantissa)
    ? `${mantissa}.0`
    : mantissa.toString();
  return `${mantissaString}E${exponentString}`;
 };

 // Escape character | quote | backslash | high surrogate not followed by low, or low surrogate not following
 // a high surrogate
 // eslint-disable-next-line no-control-regex
 const replaceRegex =
  /[\u0000-\u001F]|"|\\|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/g;

 const backslashReplacementMatch: Record<string, string> = {
  "\b": "\\b",
  "\t": "\\t",
  "\n": "\\n",
  "\f": "\\f",
  "\r": "\\r",
  '"': '\\"',
  "\\": "\\\\",
 };

 const escapeCharacter = (character: string): string => {
  const slashEscaped = backslashReplacementMatch[character];
  if (slashEscaped !== undefined) {
    return slashEscaped;
  } else {
    // If the character is marked for replacement and we can't slash escape it, we'll encode it
    // with a six-character \u00xx uppercase hexadecimal escape sequence
    const charCode = character
      .charCodeAt(0)
      .toString(16)
      .toUpperCase()
      .padStart(4, "0");
    return "\\u" + charCode;
  }
 };

 const sanitiseString = (node: string): string =>
  node.replace(replaceRegex, escapeCharacter);

 // A nice easy test case
 const testString =
  '{"-0":0,"-1":-1,"0.1":1.0E-1,"1":1,"10.1":1.01E1,"emoji":"😃","escape":"\\u001B","lone surrogate":"\\uDEAD","whitespace":" \\t\\n\\r"}';
 console.log(encode(decode(testString)) === testString);
	type JSONValue =
	\| string
	\| number
	\| null
	\| boolean
	\| JSONValue[]
	// Record<string, JSONValue> is a circular value, so doesn't work.
	// This is semantically equivalent.
	\| { [K: string]: JSONValue \| undefined };

	export const decode = (data: string): JSONValue => {
	// Canonical JSON is a subset of regualar JSON, so the regular
	// decode will work correctly.
	return JSON.parse(data);
	};

	export const encode = (data: JSONValue, maxDepth: number = 100): string =>
	encodeToArray(data, maxDepth).join("");

	// If you're really fussed about performance you'd replace this
	// with a more performant implmentation.
	// Slice removes the final element, as we only want it between
	// others.
	const intersperse = <T>(element: T, array: readonly T[]): T[] =>
	array.flatMap((x) => [x, element]).slice(0, -1);

	// We could return a string from here, however there is a performance
	// benefit to doing one large string join at the end compared to many
	// along the way.
	// Note: This is a fully functional implmentation, you may get some
	// performance benefits to implementing in a non functional way.
	// Note: Rather than using the spread operator, and .flat(), we could allow for
	// any depth nested lists of chars and then flatten these all at once.
	// (Like Erlang does with it's IOList). This could have a minor performance advantage.
	// Typescript makes it difficult to specify such an operation, so we will leave
	// it as a regular string[];
	const encodeToArray = (
	node: JSONValue,
	maxDepth: number
	): readonly string[] => {
	// Avoid encoding recursive structures
	if (maxDepth === 0) {
	throw new Error("Max depth exceeded.");
	}
	if (node === null) {
	return ["null"];
	} else if (typeof node === "boolean") {
	return [node.toString()];
	} else if (typeof node === "number" && Number.isInteger(node)) {
	// Integer numbers are desplayed in regular decimal notation
	return [node.toString()];
	} else if (typeof node === "number") {
	return [encodeExponential(node)];
	} else if (typeof node === "string") {
	return ['"', sanitiseString(node), '"'];
	} else if (Array.isArray(node)) {
	return [
	"[",
	...intersperse(
	",",
	// We need to recursively encode the contents
	node.flatMap((x) => encodeToArray(x, maxDepth - 1))
	),
	"]",
	];
	} else if (typeof node === "object") {
	return [
	"{",
	...intersperse(
	[","],
	Object.keys(node)
	// Keys must be sorted before encoding.
	.sort()
	.map((k): [string, JSONValue \| undefined] => [k, node[k]])
	// JSON.encode removes undefined values - we will too
	.filter(valueIsNotUndefined)
	// We need to recursively encode the contents
	.map(([k, v]) => [
	...encodeToArray(k, maxDepth - 1),
	// Keys and values are separated by :
	":",
	...encodeToArray(v, maxDepth - 1),
	])
	).flat(),
	"}",
	];
	} else {
	throw new Error("Node is invalid JSONValue.");
	}
	};

	const valueIsNotUndefined = <T>(
	pair: [string, T \| undefined]
	): pair is [string, T] => pair[1] !== undefined;

	const encodeExponential = (node: number): string => {
	// If we're dealing with a number but not an integer we should encode
	// in exponential notation
	const exponent = Math.floor(Math.log10(Math.abs(node)));
	const exponentString = exponent.toString();
	const mantissa = node / Math.pow(10, exponent);
	// Ensure decimal point exists
	const mantissaString = Number.isInteger(mantissa)
	? `${mantissa}.0`
	: mantissa.toString();
	return `${mantissaString}E${exponentString}`;
	};

	// Escape character \| quote \| backslash \| high surrogate not followed by low, or low surrogate not following
	// a high surrogate
	// eslint-disable-next-line no-control-regex
	const replaceRegex =
	/[\u0000-\u001F]\|"\|\\\|[\uD800-\uDBFF](?![\uDC00-\uDFFF])\|(?:[^\uD800-\uDBFF]\|^)[\uDC00-\uDFFF]/g;

	const backslashReplacementMatch: Record<string, string> = {
	"\b": "\\b",
	"\t": "\\t",
	"\n": "\\n",
	"\f": "\\f",
	"\r": "\\r",
	'"': '\\"',
	"\\": "\\\\",
	};

	const escapeCharacter = (character: string): string => {
	const slashEscaped = backslashReplacementMatch[character];
	if (slashEscaped !== undefined) {
	return slashEscaped;
	} else {
	// If the character is marked for replacement and we can't slash escape it, we'll encode it
	// with a six-character \u00xx uppercase hexadecimal escape sequence
	const charCode = character
	.charCodeAt(0)
	.toString(16)
	.toUpperCase()
	.padStart(4, "0");
	return "\\u" + charCode;
	}
	};

	const sanitiseString = (node: string): string =>
	node.replace(replaceRegex, escapeCharacter);

	// A nice easy test case
	const testString =
	'{"-0":0,"-1":-1,"0.1":1.0E-1,"1":1,"10.1":1.01E1,"emoji":"😃","escape":"\\u001B","lone surrogate":"\\uDEAD","whitespace":" \\t\\n\\r"}';
	console.log(encode(decode(testString)) === testString);