Created
June 4, 2023 13:14
-
-
Save nicolo-ribaudo/c5b09c43a10a99fd92c90e81e60732ec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const CH_BRACE_L = 0x7b as const; | |
const CH_BRACE_R = 0x7d as const; | |
const CH_SQUARE_L = 0x5b as const; | |
const CH_SQUARE_R = 0x5d as const; | |
const CH_QUOTE_D = 0x22 as const; | |
const CH_ESCAPE = 0x5c as const; | |
const CH_COMMA = 0x2c as const; | |
const CH_COLON = 0x3a as const; | |
const CH_DOT = 0x2e as const; | |
const CH_MINUS = 0x2d as const; | |
function isKwdOrNum(cp: number) { | |
return ( | |
(0x61 <= cp && cp <= 0x7a) || | |
(0x30 <= cp && cp <= 0x39) || | |
cp === CH_DOT || | |
cp === CH_MINUS | |
); | |
} | |
function isSTRChar(cp: number) { | |
return cp !== CH_ESCAPE && cp !== CH_QUOTE_D; | |
} | |
/** | |
* Parses an async iterable yielding chunks of a JSON-encoded value. | |
* It should parse valid JSON, but it's not guaranteed to properly | |
* reject invalid JSON. | |
* | |
* Numbers parsing is currently limited. | |
*/ | |
export async function parseJSONStreaming( | |
stream: AsyncIterable<string> | Iterable<string> | |
) { | |
let stack: unknown[] = []; | |
let value: unknown; | |
let stateStack: string[] = []; | |
let state = "VAL"; | |
let i = 0; | |
let chunk: string; | |
let chunkLen = 0; | |
let currRaw = ""; | |
function skipSpaces() { | |
let ch; | |
while ( | |
i < chunkLen && | |
((ch = chunk[i]), ch === " " || ch === "\t" || ch === "\n") | |
) { | |
i++; | |
} | |
return i === chunkLen; | |
} | |
function takeWhile(test: (cp: number) => boolean) { | |
const start = i; | |
while (i < chunkLen && test(chunk.codePointAt(i)!)) i++; | |
return start; | |
} | |
function enter(s: string) { | |
stateStack.push(state); | |
state = s; | |
} | |
function exit(expected: string) { | |
if (state !== expected) throw new Error("Popped invalid state"); | |
state = stateStack.pop()!; | |
} | |
function pushVal(val: unknown) { | |
stack.push(value); | |
value = val; | |
} | |
function popVal() { | |
const old = value; | |
value = stack.pop(); | |
return old; | |
} | |
function unexpected() { | |
throw new Error( | |
`Unexpected ${JSON.stringify(chunk[i])} (${i}) in "${state}` | |
); | |
} | |
for await (chunk of stream) { | |
chunkLen = chunk.length; | |
i = 0; | |
loop: while (i < chunkLen) { | |
switch (state) { | |
case "VAL": { | |
if (skipSpaces()) break loop; | |
const cp = chunk.codePointAt(i)!; | |
switch (cp) { | |
case CH_BRACE_L: | |
enter("OBJ"); | |
pushVal({}); | |
enter("VAL"); | |
i++; | |
continue loop; | |
case CH_BRACE_R: | |
// empty object | |
exit("VAL"); | |
exit("OBJ"); | |
exit("VAL"); | |
i++; | |
continue loop; | |
case CH_SQUARE_L: | |
enter("ARR"); | |
pushVal([]); | |
enter("VAL"); | |
i++; | |
continue loop; | |
case CH_SQUARE_R: | |
// empty array | |
exit("VAL"); | |
exit("ARR"); | |
exit("VAL"); | |
i++; | |
continue loop; | |
case CH_QUOTE_D: | |
enter("STR"); | |
i++; | |
currRaw = '"'; | |
continue loop; | |
default: | |
if (isKwdOrNum(cp)) { | |
enter("KWN"); | |
currRaw = chunk.slice(takeWhile(isKwdOrNum), i); | |
continue loop; | |
} | |
unexpected(); | |
} | |
continue loop; | |
} | |
case "KWN": | |
currRaw += chunk.slice(takeWhile(isKwdOrNum), i); | |
if (i < chunkLen) { | |
pushVal(JSON.parse(currRaw)); | |
exit("KWN"); | |
exit("VAL"); | |
} | |
continue loop; | |
case "STR": { | |
const cp = chunk.codePointAt(i)!; | |
switch (cp) { | |
case CH_QUOTE_D: | |
currRaw += '"'; | |
i++; | |
pushVal(JSON.parse(currRaw)); | |
exit(state); | |
exit("VAL"); | |
continue loop; | |
case CH_ESCAPE: | |
enter("ESC"); | |
currRaw += "\\"; | |
i++; | |
continue loop; | |
default: | |
currRaw += chunk.slice(takeWhile(isSTRChar), i); | |
} | |
continue loop; | |
} | |
case "ESC": | |
currRaw += chunk[i]; | |
i++; | |
exit("ESC"); | |
continue loop; | |
case "ARR": { | |
if (skipSpaces()) break loop; | |
const cp = chunk.codePointAt(i)!; | |
switch (cp) { | |
case CH_SQUARE_R: | |
case CH_COMMA: { | |
const el = popVal(); | |
(value as unknown[]).push(el); | |
i++; | |
if (cp === CH_COMMA) { | |
enter("VAL"); | |
} else { | |
exit("ARR"); | |
exit("VAL"); | |
} | |
continue loop; | |
} | |
default: | |
unexpected(); | |
} | |
continue loop; | |
} | |
case "OBJ": { | |
if (skipSpaces()) break loop; | |
const cp = chunk.codePointAt(i)!; | |
switch (cp) { | |
case CH_BRACE_R: | |
case CH_COMMA: { | |
const val = popVal(); | |
const key = popVal() as string; | |
// __proto__ pollution yay | |
(value as Record<string, unknown>)[key] = val; | |
i++; | |
if (cp === CH_COMMA) { | |
enter("VAL"); | |
} else { | |
exit("OBJ"); | |
exit("VAL"); | |
} | |
continue loop; | |
} | |
case CH_COLON: { | |
enter("VAL"); | |
i++; | |
continue loop; | |
} | |
default: | |
unexpected(); | |
} | |
continue loop; | |
} | |
default: | |
if (!skipSpaces()) unexpected(); | |
} | |
} | |
} | |
if (state === "KWN") { | |
value = JSON.parse(currRaw); | |
exit("KWN"); | |
exit("VAL"); | |
} | |
return value; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment