Created
October 19, 2021 20:18
-
-
Save kalda341/647f936aefa2a49b7707d4fbf6768fb9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
type JSONValue = | |
| string | |
| number | |
| null | |
| boolean | |
| JSONValue[] | |
// Record<string, JSONValue> is a circular value, so doesn't work. | |
// This is semantically equivalent. | |
| { [K: string]: JSONValue | undefined }; | |
export const decode = (data: string): JSONValue => { | |
// Canonical JSON is a subset of regualar JSON, so the regular | |
// decode will work correctly. | |
return JSON.parse(data); | |
}; | |
export const encode = (data: JSONValue, maxDepth: number = 100): string => | |
encodeToArray(data, maxDepth).join(""); | |
// If you're really fussed about performance you'd replace this | |
// with a more performant implmentation. | |
// Slice removes the final element, as we only want it between | |
// others. | |
const intersperse = <T>(element: T, array: readonly T[]): T[] => | |
array.flatMap((x) => [x, element]).slice(0, -1); | |
// We could return a string from here, however there is a performance | |
// benefit to doing one large string join at the end compared to many | |
// along the way. | |
// Note: This is a fully functional implmentation, you may get some | |
// performance benefits to implementing in a non functional way. | |
// Note: Rather than using the spread operator, and .flat(), we could allow for | |
// any depth nested lists of chars and then flatten these all at once. | |
// (Like Erlang does with it's IOList). This could have a minor performance advantage. | |
// Typescript makes it difficult to specify such an operation, so we will leave | |
// it as a regular string[]; | |
const encodeToArray = ( | |
node: JSONValue, | |
maxDepth: number | |
): readonly string[] => { | |
// Avoid encoding recursive structures | |
if (maxDepth === 0) { | |
throw new Error("Max depth exceeded."); | |
} | |
if (node === null) { | |
return ["null"]; | |
} else if (typeof node === "boolean") { | |
return [node.toString()]; | |
} else if (typeof node === "number" && Number.isInteger(node)) { | |
// Integer numbers are desplayed in regular decimal notation | |
return [node.toString()]; | |
} else if (typeof node === "number") { | |
return [encodeExponential(node)]; | |
} else if (typeof node === "string") { | |
return ['"', sanitiseString(node), '"']; | |
} else if (Array.isArray(node)) { | |
return [ | |
"[", | |
...intersperse( | |
",", | |
// We need to recursively encode the contents | |
node.flatMap((x) => encodeToArray(x, maxDepth - 1)) | |
), | |
"]", | |
]; | |
} else if (typeof node === "object") { | |
return [ | |
"{", | |
...intersperse( | |
[","], | |
Object.keys(node) | |
// Keys must be sorted before encoding. | |
.sort() | |
.map((k): [string, JSONValue | undefined] => [k, node[k]]) | |
// JSON.encode removes undefined values - we will too | |
.filter(valueIsNotUndefined) | |
// We need to recursively encode the contents | |
.map(([k, v]) => [ | |
...encodeToArray(k, maxDepth - 1), | |
// Keys and values are separated by : | |
":", | |
...encodeToArray(v, maxDepth - 1), | |
]) | |
).flat(), | |
"}", | |
]; | |
} else { | |
throw new Error("Node is invalid JSONValue."); | |
} | |
}; | |
const valueIsNotUndefined = <T>( | |
pair: [string, T | undefined] | |
): pair is [string, T] => pair[1] !== undefined; | |
const encodeExponential = (node: number): string => { | |
// If we're dealing with a number but not an integer we should encode | |
// in exponential notation | |
const exponent = Math.floor(Math.log10(Math.abs(node))); | |
const exponentString = exponent.toString(); | |
const mantissa = node / Math.pow(10, exponent); | |
// Ensure decimal point exists | |
const mantissaString = Number.isInteger(mantissa) | |
? `${mantissa}.0` | |
: mantissa.toString(); | |
return `${mantissaString}E${exponentString}`; | |
}; | |
// Escape character | quote | backslash | high surrogate not followed by low, or low surrogate not following | |
// a high surrogate | |
// eslint-disable-next-line no-control-regex | |
const replaceRegex = | |
/[\u0000-\u001F]|"|\\|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/g; | |
const backslashReplacementMatch: Record<string, string> = { | |
"\b": "\\b", | |
"\t": "\\t", | |
"\n": "\\n", | |
"\f": "\\f", | |
"\r": "\\r", | |
'"': '\\"', | |
"\\": "\\\\", | |
}; | |
const escapeCharacter = (character: string): string => { | |
const slashEscaped = backslashReplacementMatch[character]; | |
if (slashEscaped !== undefined) { | |
return slashEscaped; | |
} else { | |
// If the character is marked for replacement and we can't slash escape it, we'll encode it | |
// with a six-character \u00xx uppercase hexadecimal escape sequence | |
const charCode = character | |
.charCodeAt(0) | |
.toString(16) | |
.toUpperCase() | |
.padStart(4, "0"); | |
return "\\u" + charCode; | |
} | |
}; | |
const sanitiseString = (node: string): string => | |
node.replace(replaceRegex, escapeCharacter); | |
// A nice easy test case | |
const testString = | |
'{"-0":0,"-1":-1,"0.1":1.0E-1,"1":1,"10.1":1.01E1,"emoji":"😃","escape":"\\u001B","lone surrogate":"\\uDEAD","whitespace":" \\t\\n\\r"}'; | |
console.log(encode(decode(testString)) === testString); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment