Created
November 17, 2020 00:42
-
-
Save THEtheChad/25db637f5c1345d7eb0761a7d05da7fe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// inspired by https://gist.github.com/creationix/1821394 | |
// from Tim Caswell | |
import { EventEmitter } from 'events'; | |
interface ParserEvents { | |
StartObject: () => void; | |
EndObject: () => void; | |
StartArray: () => void; | |
EndArray: () => void; | |
Colon: () => void; | |
Comma: () => void; | |
String: (str: string) => void; | |
Boolean: (bool: boolean) => void; | |
Null: () => void; | |
Number: (num: number) => void; | |
Error: (err: Error) => void; | |
} | |
declare interface SaxParser { | |
on<U extends keyof ParserEvents>(event: U, listener: ParserEvents[U]): this; | |
emit<U extends keyof ParserEvents>( | |
event: U, | |
...args: Parameters<ParserEvents[U]> | |
): boolean; | |
} | |
// Named constants with unique integer values | |
const C = { | |
START: 0x11, | |
TRUE1: 0x21, | |
TRUE2: 0x22, | |
TRUE3: 0x23, | |
FALSE1: 0x31, | |
FALSE2: 0x32, | |
FALSE3: 0x33, | |
FALSE4: 0x34, | |
NULL1: 0x41, | |
NULL3: 0x42, | |
NULL2: 0x43, | |
NUMBER1: 0x51, | |
NUMBER2: 0x52, | |
NUMBER3: 0x53, | |
NUMBER4: 0x54, | |
NUMBER5: 0x55, | |
NUMBER6: 0x56, | |
NUMBER7: 0x57, | |
NUMBER8: 0x58, | |
STRING1: 0x61, | |
STRING2: 0x62, | |
STRING3: 0x63, | |
STRING4: 0x64, | |
STRING5: 0x65, | |
STRING6: 0x66 | |
}; | |
// slow to string converter (only used when throwing syntax errors) | |
function toknam(code: number) { | |
let k: keyof typeof C; | |
for (k in C) { | |
if (C[k] === code) { | |
return k; | |
} | |
} | |
return code && '0x' + code.toString(16); | |
} | |
class SaxParser extends EventEmitter { | |
state = C.START; | |
// for string parsing | |
string?: string; // string data | |
unicode?: string; // unicode escapes | |
// For number parsing | |
negative?: boolean; | |
magnatude?: number; | |
position?: number; | |
exponent?: number; | |
negativeExponent?: boolean; | |
charError(buffer: Buffer, i: number) { | |
this.emit( | |
'Error', | |
new Error( | |
'Unexpected ' + | |
JSON.stringify(String.fromCharCode(buffer[i])) + | |
' at position ' + | |
i + | |
' in state ' + | |
toknam(this.state) | |
) | |
); | |
} | |
parse(buffer: Buffer) { | |
if (typeof buffer === 'string') buffer = new Buffer(buffer); | |
var n; | |
for (var i = 0, l = buffer.length; i < l; i++) { | |
switch (this.state) { | |
case C.START: | |
n = buffer[i]; | |
switch (n) { | |
case 0x7b: // `{` | |
this.emit('StartObject'); | |
continue; | |
case 0x7d: // `}` | |
this.emit('EndObject'); | |
continue; | |
case 0x5b: // `[` | |
this.emit('StartArray'); | |
continue; | |
case 0x5d: // `]` | |
this.emit('EndArray'); | |
continue; | |
case 0x3a: // `:` | |
this.emit('Colon'); | |
continue; | |
case 0x2c: // `,` | |
this.emit('Comma'); | |
continue; | |
case 0x74: // `t` | |
this.state = C.TRUE1; | |
continue; | |
case 0x66: // `f` | |
this.state = C.FALSE1; | |
continue; | |
case 0x6e: // `n` | |
this.state = C.NULL1; | |
continue; | |
case 0x22: // `"` | |
this.string = ''; | |
this.state = C.STRING1; | |
continue; | |
case 0x2d: // `-` | |
this.negative = true; | |
this.state = C.NUMBER1; | |
continue; | |
case 0x30: // `0` | |
this.magnatude = 0; | |
this.state = C.NUMBER2; | |
continue; | |
} | |
if (n > 0x30 && n < 0x40) { | |
// 1-9 | |
this.magnatude = n - 0x30; | |
this.state = C.NUMBER3; | |
continue; | |
} | |
if (n === 0x20 || n === 0x09 || n === 0x0a || n === 0x0d) { | |
continue; // whitespace | |
} | |
this.charError(buffer, i); | |
case C.STRING1: // After open quote | |
n = buffer[i]; | |
switch (n) { | |
case 0x22: // `"` | |
this.emit('String', this.string!); | |
this.string = undefined; | |
this.state = C.START; | |
continue; | |
case 0x5c: // `\` | |
this.state = C.STRING2; | |
continue; | |
} | |
if (n >= 0x20) { | |
this.string += String.fromCharCode(n); | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.STRING2: // After backslash | |
n = buffer[i]; | |
switch (n) { | |
case 0x22: | |
this.string += '"'; | |
this.state = C.STRING1; | |
continue; | |
case 0x5c: | |
this.string += '\\'; | |
this.state = C.STRING1; | |
continue; | |
case 0x2f: | |
this.string += '/'; | |
this.state = C.STRING1; | |
continue; | |
case 0x62: | |
this.string += '\b'; | |
this.state = C.STRING1; | |
continue; | |
case 0x66: | |
this.string += '\f'; | |
this.state = C.STRING1; | |
continue; | |
case 0x6e: | |
this.string += '\n'; | |
this.state = C.STRING1; | |
continue; | |
case 0x72: | |
this.string += '\r'; | |
this.state = C.STRING1; | |
continue; | |
case 0x74: | |
this.string += '\t'; | |
this.state = C.STRING1; | |
continue; | |
case 0x75: | |
this.unicode = ''; | |
this.state = C.STRING3; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.STRING3: | |
case C.STRING4: | |
case C.STRING5: | |
case C.STRING6: // unicode hex codes | |
n = buffer[i]; | |
// 0-9 A-F a-f | |
if ( | |
(n >= 0x30 && n < 0x40) || | |
(n > 0x40 && n <= 0x46) || | |
(n > 0x60 && n <= 0x66) | |
) { | |
this.unicode += String.fromCharCode(n); | |
if (this.state++ === C.STRING6) { | |
this.string += String.fromCharCode(parseInt(this.unicode!, 16)); | |
this.unicode = undefined; | |
this.state = C.STRING1; | |
} | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NUMBER1: // after minus | |
n = buffer[i]; | |
if (n === 0x30) { | |
// `0` | |
this.magnatude = 0; | |
this.state = C.NUMBER2; | |
continue; | |
} | |
if (n > 0x30 && n < 0x40) { | |
// `1`-`9` | |
this.magnatude = n - 0x30; | |
this.state = C.NUMBER3; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NUMBER2: // * After initial zero | |
switch (buffer[i]) { | |
case 0x2e: // . | |
this.position = 0.1; | |
this.state = C.NUMBER4; | |
continue; | |
case 0x65: | |
case 0x45: // e/E | |
this.exponent = 0; | |
this.state = C.NUMBER6; | |
continue; | |
} | |
this.finish(); | |
i--; // rewind to re-check this char | |
continue; | |
case C.NUMBER3: // * After digit (before period) | |
n = buffer[i]; | |
switch (n) { | |
case 0x2e: // . | |
this.position = 0.1; | |
this.state = C.NUMBER4; | |
continue; | |
case 0x65: | |
case 0x45: // e/E | |
this.exponent = 0; | |
this.state = C.NUMBER6; | |
continue; | |
} | |
if (n >= 0x30 && n < 0x40) { | |
// 0-9 | |
this.magnatude = this.magnatude! * 10 + (n - 0x30); | |
continue; | |
} | |
this.finish(); | |
i--; // rewind to re-check | |
continue; | |
case C.NUMBER4: // After period | |
n = buffer[i]; | |
if (n >= 0x30 && n < 0x40) { | |
// 0-9 | |
this.magnatude! += this.position! * (n - 0x30); | |
this.position! /= 10; | |
this.state = C.NUMBER5; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NUMBER5: // * After digit (after period) | |
n = buffer[i]; | |
if (n >= 0x30 && n < 0x40) { | |
// 0-9 | |
this.magnatude! += this.position! * (n - 0x30); | |
this.position! /= 10; | |
continue; | |
} | |
if (n === 0x65 || n === 0x45) { | |
// E/e | |
this.exponent = 0; | |
this.state = C.NUMBER6; | |
continue; | |
} | |
this.finish(); | |
i--; // rewind | |
continue; | |
case C.NUMBER6: // After E | |
n = buffer[i]; | |
if (n === 0x2b || n === 0x2d) { | |
// +/- | |
if (n === 0x2d) { | |
this.negativeExponent = true; | |
} | |
this.state = C.NUMBER7; | |
continue; | |
} | |
if (n >= 0x30 && n < 0x40) { | |
this.exponent = this.exponent! * 10 + (n - 0x30); | |
this.state = C.NUMBER8; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NUMBER7: // After +/- | |
n = buffer[i]; | |
if (n >= 0x30 && n < 0x40) { | |
// 0-9 | |
this.exponent = this.exponent! * 10 + (n - 0x30); | |
this.state = C.NUMBER8; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NUMBER8: // * After digit (after +/-) | |
n = buffer[i]; | |
if (n >= 0x30 && n < 0x40) { | |
// 0-9 | |
this.exponent = this.exponent! * 10 + (n - 0x30); | |
continue; | |
} | |
this.finish(); | |
i--; | |
continue; | |
case C.TRUE1: // r | |
if (buffer[i] === 0x72) { | |
this.state = C.TRUE2; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.TRUE2: // u | |
if (buffer[i] === 0x75) { | |
this.state = C.TRUE3; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.TRUE3: // e | |
if (buffer[i] === 0x65) { | |
this.state = C.START; | |
this.emit('Boolean', true); | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.FALSE1: // a | |
if (buffer[i] === 0x61) { | |
this.state = C.FALSE2; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.FALSE2: // l | |
if (buffer[i] === 0x6c) { | |
this.state = C.FALSE3; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.FALSE3: // s | |
if (buffer[i] === 0x73) { | |
this.state = C.FALSE4; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.FALSE4: // e | |
if (buffer[i] === 0x65) { | |
this.state = C.START; | |
this.emit('Boolean', false); | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NULL1: // u | |
if (buffer[i] === 0x75) { | |
this.state = C.NULL2; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NULL2: // l | |
if (buffer[i] === 0x6c) { | |
this.state = C.NULL3; | |
continue; | |
} | |
this.charError(buffer, i); | |
case C.NULL3: // l | |
if (buffer[i] === 0x6c) { | |
this.state = C.START; | |
this.emit('Null'); | |
continue; | |
} | |
this.charError(buffer, i); | |
} | |
} | |
} | |
finish() { | |
switch (this.state) { | |
case C.NUMBER2: // * After initial zero | |
this.emit('Number', 0); | |
this.state = C.START; | |
this.magnatude = undefined; | |
this.negative = undefined; | |
break; | |
case C.NUMBER3: // * After digit (before period) | |
this.state = C.START; | |
if (this.negative) { | |
this.magnatude = -this.magnatude!; | |
this.negative = undefined; | |
} | |
this.emit('Number', this.magnatude!); | |
this.magnatude = undefined; | |
break; | |
case C.NUMBER5: // * After digit (after period) | |
this.state = C.START; | |
if (this.negative) { | |
this.magnatude = -this.magnatude!; | |
this.negative = undefined; | |
} | |
this.emit('Number', this.negative ? -this.magnatude! : this.magnatude!); | |
this.magnatude = undefined; | |
this.position = undefined; | |
break; | |
case C.NUMBER8: // * After digit (after +/-) | |
if (this.negativeExponent) { | |
this.exponent = -this.exponent!; | |
this.negativeExponent = undefined; | |
} | |
this.magnatude! *= Math.pow(10, this.exponent!); | |
this.exponent = undefined; | |
if (this.negative) { | |
this.magnatude = -this.magnatude!; | |
this.negative = undefined; | |
} | |
this.state = C.START; | |
this.emit('Number', this.magnatude!); | |
this.magnatude = undefined; | |
break; | |
} | |
if (this.state !== C.START) { | |
this.emit('Error', new Error('Unexpected end of input stream')); | |
} | |
} | |
} | |
export default SaxParser; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment