Skip to content

Instantly share code, notes, and snippets.

@THEtheChad
Created November 17, 2020 00:42
Show Gist options
  • Save THEtheChad/25db637f5c1345d7eb0761a7d05da7fe to your computer and use it in GitHub Desktop.
Save THEtheChad/25db637f5c1345d7eb0761a7d05da7fe to your computer and use it in GitHub Desktop.
// inspired by https://gist.github.com/creationix/1821394
// from Tim Caswell
import { EventEmitter } from 'events';
interface ParserEvents {
StartObject: () => void;
EndObject: () => void;
StartArray: () => void;
EndArray: () => void;
Colon: () => void;
Comma: () => void;
String: (str: string) => void;
Boolean: (bool: boolean) => void;
Null: () => void;
Number: (num: number) => void;
Error: (err: Error) => void;
}
declare interface SaxParser {
on<U extends keyof ParserEvents>(event: U, listener: ParserEvents[U]): this;
emit<U extends keyof ParserEvents>(
event: U,
...args: Parameters<ParserEvents[U]>
): boolean;
}
// Named constants with unique integer values
const C = {
START: 0x11,
TRUE1: 0x21,
TRUE2: 0x22,
TRUE3: 0x23,
FALSE1: 0x31,
FALSE2: 0x32,
FALSE3: 0x33,
FALSE4: 0x34,
NULL1: 0x41,
NULL3: 0x42,
NULL2: 0x43,
NUMBER1: 0x51,
NUMBER2: 0x52,
NUMBER3: 0x53,
NUMBER4: 0x54,
NUMBER5: 0x55,
NUMBER6: 0x56,
NUMBER7: 0x57,
NUMBER8: 0x58,
STRING1: 0x61,
STRING2: 0x62,
STRING3: 0x63,
STRING4: 0x64,
STRING5: 0x65,
STRING6: 0x66
};
// slow to string converter (only used when throwing syntax errors)
function toknam(code: number) {
let k: keyof typeof C;
for (k in C) {
if (C[k] === code) {
return k;
}
}
return code && '0x' + code.toString(16);
}
class SaxParser extends EventEmitter {
state = C.START;
// for string parsing
string?: string; // string data
unicode?: string; // unicode escapes
// For number parsing
negative?: boolean;
magnatude?: number;
position?: number;
exponent?: number;
negativeExponent?: boolean;
charError(buffer: Buffer, i: number) {
this.emit(
'Error',
new Error(
'Unexpected ' +
JSON.stringify(String.fromCharCode(buffer[i])) +
' at position ' +
i +
' in state ' +
toknam(this.state)
)
);
}
parse(buffer: Buffer) {
if (typeof buffer === 'string') buffer = new Buffer(buffer);
var n;
for (var i = 0, l = buffer.length; i < l; i++) {
switch (this.state) {
case C.START:
n = buffer[i];
switch (n) {
case 0x7b: // `{`
this.emit('StartObject');
continue;
case 0x7d: // `}`
this.emit('EndObject');
continue;
case 0x5b: // `[`
this.emit('StartArray');
continue;
case 0x5d: // `]`
this.emit('EndArray');
continue;
case 0x3a: // `:`
this.emit('Colon');
continue;
case 0x2c: // `,`
this.emit('Comma');
continue;
case 0x74: // `t`
this.state = C.TRUE1;
continue;
case 0x66: // `f`
this.state = C.FALSE1;
continue;
case 0x6e: // `n`
this.state = C.NULL1;
continue;
case 0x22: // `"`
this.string = '';
this.state = C.STRING1;
continue;
case 0x2d: // `-`
this.negative = true;
this.state = C.NUMBER1;
continue;
case 0x30: // `0`
this.magnatude = 0;
this.state = C.NUMBER2;
continue;
}
if (n > 0x30 && n < 0x40) {
// 1-9
this.magnatude = n - 0x30;
this.state = C.NUMBER3;
continue;
}
if (n === 0x20 || n === 0x09 || n === 0x0a || n === 0x0d) {
continue; // whitespace
}
this.charError(buffer, i);
case C.STRING1: // After open quote
n = buffer[i];
switch (n) {
case 0x22: // `"`
this.emit('String', this.string!);
this.string = undefined;
this.state = C.START;
continue;
case 0x5c: // `\`
this.state = C.STRING2;
continue;
}
if (n >= 0x20) {
this.string += String.fromCharCode(n);
continue;
}
this.charError(buffer, i);
case C.STRING2: // After backslash
n = buffer[i];
switch (n) {
case 0x22:
this.string += '"';
this.state = C.STRING1;
continue;
case 0x5c:
this.string += '\\';
this.state = C.STRING1;
continue;
case 0x2f:
this.string += '/';
this.state = C.STRING1;
continue;
case 0x62:
this.string += '\b';
this.state = C.STRING1;
continue;
case 0x66:
this.string += '\f';
this.state = C.STRING1;
continue;
case 0x6e:
this.string += '\n';
this.state = C.STRING1;
continue;
case 0x72:
this.string += '\r';
this.state = C.STRING1;
continue;
case 0x74:
this.string += '\t';
this.state = C.STRING1;
continue;
case 0x75:
this.unicode = '';
this.state = C.STRING3;
continue;
}
this.charError(buffer, i);
case C.STRING3:
case C.STRING4:
case C.STRING5:
case C.STRING6: // unicode hex codes
n = buffer[i];
// 0-9 A-F a-f
if (
(n >= 0x30 && n < 0x40) ||
(n > 0x40 && n <= 0x46) ||
(n > 0x60 && n <= 0x66)
) {
this.unicode += String.fromCharCode(n);
if (this.state++ === C.STRING6) {
this.string += String.fromCharCode(parseInt(this.unicode!, 16));
this.unicode = undefined;
this.state = C.STRING1;
}
continue;
}
this.charError(buffer, i);
case C.NUMBER1: // after minus
n = buffer[i];
if (n === 0x30) {
// `0`
this.magnatude = 0;
this.state = C.NUMBER2;
continue;
}
if (n > 0x30 && n < 0x40) {
// `1`-`9`
this.magnatude = n - 0x30;
this.state = C.NUMBER3;
continue;
}
this.charError(buffer, i);
case C.NUMBER2: // * After initial zero
switch (buffer[i]) {
case 0x2e: // .
this.position = 0.1;
this.state = C.NUMBER4;
continue;
case 0x65:
case 0x45: // e/E
this.exponent = 0;
this.state = C.NUMBER6;
continue;
}
this.finish();
i--; // rewind to re-check this char
continue;
case C.NUMBER3: // * After digit (before period)
n = buffer[i];
switch (n) {
case 0x2e: // .
this.position = 0.1;
this.state = C.NUMBER4;
continue;
case 0x65:
case 0x45: // e/E
this.exponent = 0;
this.state = C.NUMBER6;
continue;
}
if (n >= 0x30 && n < 0x40) {
// 0-9
this.magnatude = this.magnatude! * 10 + (n - 0x30);
continue;
}
this.finish();
i--; // rewind to re-check
continue;
case C.NUMBER4: // After period
n = buffer[i];
if (n >= 0x30 && n < 0x40) {
// 0-9
this.magnatude! += this.position! * (n - 0x30);
this.position! /= 10;
this.state = C.NUMBER5;
continue;
}
this.charError(buffer, i);
case C.NUMBER5: // * After digit (after period)
n = buffer[i];
if (n >= 0x30 && n < 0x40) {
// 0-9
this.magnatude! += this.position! * (n - 0x30);
this.position! /= 10;
continue;
}
if (n === 0x65 || n === 0x45) {
// E/e
this.exponent = 0;
this.state = C.NUMBER6;
continue;
}
this.finish();
i--; // rewind
continue;
case C.NUMBER6: // After E
n = buffer[i];
if (n === 0x2b || n === 0x2d) {
// +/-
if (n === 0x2d) {
this.negativeExponent = true;
}
this.state = C.NUMBER7;
continue;
}
if (n >= 0x30 && n < 0x40) {
this.exponent = this.exponent! * 10 + (n - 0x30);
this.state = C.NUMBER8;
continue;
}
this.charError(buffer, i);
case C.NUMBER7: // After +/-
n = buffer[i];
if (n >= 0x30 && n < 0x40) {
// 0-9
this.exponent = this.exponent! * 10 + (n - 0x30);
this.state = C.NUMBER8;
continue;
}
this.charError(buffer, i);
case C.NUMBER8: // * After digit (after +/-)
n = buffer[i];
if (n >= 0x30 && n < 0x40) {
// 0-9
this.exponent = this.exponent! * 10 + (n - 0x30);
continue;
}
this.finish();
i--;
continue;
case C.TRUE1: // r
if (buffer[i] === 0x72) {
this.state = C.TRUE2;
continue;
}
this.charError(buffer, i);
case C.TRUE2: // u
if (buffer[i] === 0x75) {
this.state = C.TRUE3;
continue;
}
this.charError(buffer, i);
case C.TRUE3: // e
if (buffer[i] === 0x65) {
this.state = C.START;
this.emit('Boolean', true);
continue;
}
this.charError(buffer, i);
case C.FALSE1: // a
if (buffer[i] === 0x61) {
this.state = C.FALSE2;
continue;
}
this.charError(buffer, i);
case C.FALSE2: // l
if (buffer[i] === 0x6c) {
this.state = C.FALSE3;
continue;
}
this.charError(buffer, i);
case C.FALSE3: // s
if (buffer[i] === 0x73) {
this.state = C.FALSE4;
continue;
}
this.charError(buffer, i);
case C.FALSE4: // e
if (buffer[i] === 0x65) {
this.state = C.START;
this.emit('Boolean', false);
continue;
}
this.charError(buffer, i);
case C.NULL1: // u
if (buffer[i] === 0x75) {
this.state = C.NULL2;
continue;
}
this.charError(buffer, i);
case C.NULL2: // l
if (buffer[i] === 0x6c) {
this.state = C.NULL3;
continue;
}
this.charError(buffer, i);
case C.NULL3: // l
if (buffer[i] === 0x6c) {
this.state = C.START;
this.emit('Null');
continue;
}
this.charError(buffer, i);
}
}
}
finish() {
switch (this.state) {
case C.NUMBER2: // * After initial zero
this.emit('Number', 0);
this.state = C.START;
this.magnatude = undefined;
this.negative = undefined;
break;
case C.NUMBER3: // * After digit (before period)
this.state = C.START;
if (this.negative) {
this.magnatude = -this.magnatude!;
this.negative = undefined;
}
this.emit('Number', this.magnatude!);
this.magnatude = undefined;
break;
case C.NUMBER5: // * After digit (after period)
this.state = C.START;
if (this.negative) {
this.magnatude = -this.magnatude!;
this.negative = undefined;
}
this.emit('Number', this.negative ? -this.magnatude! : this.magnatude!);
this.magnatude = undefined;
this.position = undefined;
break;
case C.NUMBER8: // * After digit (after +/-)
if (this.negativeExponent) {
this.exponent = -this.exponent!;
this.negativeExponent = undefined;
}
this.magnatude! *= Math.pow(10, this.exponent!);
this.exponent = undefined;
if (this.negative) {
this.magnatude = -this.magnatude!;
this.negative = undefined;
}
this.state = C.START;
this.emit('Number', this.magnatude!);
this.magnatude = undefined;
break;
}
if (this.state !== C.START) {
this.emit('Error', new Error('Unexpected end of input stream'));
}
}
}
export default SaxParser;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment