Skip to content

Instantly share code, notes, and snippets.

@creationix
Created February 13, 2012 23:20
Show Gist options
  • Save creationix/1821394 to your computer and use it in GitHub Desktop.
Save creationix/1821394 to your computer and use it in GitHub Desktop.
event-only version of jsonparse
// Named constants with unique integer values
var C = {};
// Tokenizer States
var START = C.START = 0x11;
var TRUE1 = C.TRUE1 = 0x21;
var TRUE2 = C.TRUE2 = 0x22;
var TRUE3 = C.TRUE3 = 0x23;
var FALSE1 = C.FALSE1 = 0x31;
var FALSE2 = C.FALSE2 = 0x32;
var FALSE3 = C.FALSE3 = 0x33;
var FALSE4 = C.FALSE4 = 0x34;
var NULL1 = C.NULL1 = 0x41;
var NULL2 = C.NULL3 = 0x42;
var NULL3 = C.NULL2 = 0x43;
var NUMBER1 = C.NUMBER1 = 0x51;
var NUMBER2 = C.NUMBER2 = 0x52;
var NUMBER3 = C.NUMBER3 = 0x53;
var NUMBER4 = C.NUMBER4 = 0x54;
var NUMBER5 = C.NUMBER5 = 0x55;
var NUMBER6 = C.NUMBER6 = 0x56;
var NUMBER7 = C.NUMBER7 = 0x57;
var NUMBER8 = C.NUMBER8 = 0x58;
var STRING1 = C.STRING1 = 0x61;
var STRING2 = C.STRING2 = 0x62;
var STRING3 = C.STRING3 = 0x63;
var STRING4 = C.STRING4 = 0x64;
var STRING5 = C.STRING5 = 0x65;
var STRING6 = C.STRING6 = 0x66;
// Slow code to string converter (only used when throwing syntax errors)
function toknam(code) {
var keys = Object.keys(C);
for (var i = 0, l = keys.length; i < l; i++) {
var key = keys[i];
if (C[key] === code) { return key; }
}
return code && ("0x" + code.toString(16));
}
function SaxParser(callbacks) {
this.callbacks = callbacks;
this.state = START;
// for string parsing
this.string = undefined; // string data
this.unicode = undefined; // unicode escapes
// For number parsing
this.negative = undefined;
this.magnatude = undefined;
this.position = undefined;
this.exponent = undefined;
this.negativeExponent = undefined;
}
var proto = SaxParser.prototype;
proto.charError = function (buffer, i) {
this.callbacks.onError(new Error("Unexpected " + JSON.stringify(String.fromCharCode(buffer[i])) + " at position " + i + " in state " + toknam(this.state)));
};
proto.parse = function (buffer) {
if (typeof buffer === "string") buffer = new Buffer(buffer);
var n;
for (var i = 0, l = buffer.length; i < l; i++) {
switch (this.state) {
case START:
n = buffer[i];
switch (n) {
case 0x7b: // `{`
this.callbacks.onStartObject();
continue;
case 0x7d: // `}`
this.callbacks.onEndObject();
continue;
case 0x5b: // `[`
this.callbacks.onStartArray();
continue;
case 0x5d: // `]`
this.callbacks.onEndArray();
continue;
case 0x3a: // `:`
this.callbacks.onColon();
continue;
case 0x2c: // `,`
this.callbacks.onComma();
continue;
case 0x74: // `t`
this.state = TRUE1;
continue;
case 0x66: // `f`
this.state = FALSE1;
continue;
case 0x6e: // `n`
this.state = NULL1;
continue;
case 0x22: // `"`
this.string = "";
this.state = STRING1;
continue;
case 0x2d: // `-`
this.negative = true;
this.state = NUMBER1;
continue;
case 0x30: // `0`
this.magnatude = 0;
this.state = NUMBER2;
continue;
}
if (n > 0x30 && n < 0x40) { // 1-9
this.magnatude = n - 0x30;
this.state = NUMBER3;
continue;
}
if (n === 0x20 || n === 0x09 || n === 0x0a || n === 0x0d) {
continue; // whitespace
}
this.charError(buffer, i);
case STRING1: // After open quote
n = buffer[i];
switch (n) {
case 0x22: // `"`
this.callbacks.onString(this.string);
this.string = undefined;
this.state = START;
continue;
case 0x5c: // `\`
this.state = STRING2;
continue;
}
if (n >= 0x20) {
this.string += String.fromCharCode(n);
continue;
}
this.charError(buffer, i);
case STRING2: // After backslash
n = buffer[i];
switch (n) {
case 0x22: this.string += "\""; this.state = STRING1; continue;
case 0x5c: this.string += "\\"; this.state = STRING1; continue;
case 0x2f: this.string += "\/"; this.state = STRING1; continue;
case 0x62: this.string += "\b"; this.state = STRING1; continue;
case 0x66: this.string += "\f"; this.state = STRING1; continue;
case 0x6e: this.string += "\n"; this.state = STRING1; continue;
case 0x72: this.string += "\r"; this.state = STRING1; continue;
case 0x74: this.string += "\t"; this.state = STRING1; continue;
case 0x75: this.unicode = ""; this.state = STRING3; continue;
}
this.charError(buffer, i);
case STRING3: case STRING4: case STRING5: case STRING6: // unicode hex codes
n = buffer[i];
// 0-9 A-F a-f
if ((n >= 0x30 && n < 0x40) || (n > 0x40 && n <= 0x46) || (n > 0x60 && n <= 0x66)) {
this.unicode += String.fromCharCode(n);
if (this.state++ === STRING6) {
this.string += String.fromCharCode(parseInt(this.unicode, 16));
this.unicode = undefined;
this.state = STRING1;
}
continue;
}
this.charError(buffer, i);
case NUMBER1: // after minus
n = buffer[i];
if (n === 0x30) { // `0`
this.magnatude = 0;
this.state = NUMBER2;
continue;
}
if (n > 0x30 && n < 0x40) { // `1`-`9`
this.magnatude = n - 0x30;
this.state = NUMBER3;
continue;
}
this.charError(buffer, i);
case NUMBER2: // * After initial zero
switch (buffer[i]) {
case 0x2e: // .
this.position = 0.1; this.state = NUMBER4; continue;
case 0x65: case 0x45: // e/E
this.exponent = 0; this.state = NUMBER6; continue;
}
this.finish();
i--; // rewind to re-check this char
continue;
case NUMBER3: // * After digit (before period)
n = buffer[i];
switch (n) {
case 0x2e: // .
this.position = 0.1; this.state = NUMBER4; continue;
case 0x65: case 0x45: // e/E
this.exponent = 0; this.state = NUMBER6; continue;
}
if (n >= 0x30 && n < 0x40) { // 0-9
this.magnatude = this.magnatude * 10 + (n - 0x30);
continue;
}
this.finish();
i--; // rewind to re-check
continue;
case NUMBER4: // After period
n = buffer[i];
if (n >= 0x30 && n < 0x40) { // 0-9
this.magnatude += this.position * (n - 0x30);
this.position /= 10;
this.state = NUMBER5;
continue;
}
this.charError(buffer, i);
case NUMBER5: // * After digit (after period)
n = buffer[i];
if (n >= 0x30 && n < 0x40) { // 0-9
this.magnatude += this.position * (n - 0x30);
this.position /= 10;
continue;
}
if (n === 0x65 || n === 0x45) { // E/e
this.exponent = 0;
this.state = NUMBER6;
continue;
}
this.finish();
i--; // rewind
continue;
case NUMBER6: // After E
n = buffer[i];
if (n === 0x2b || n === 0x2d) { // +/-
if (n === 0x2d) { this.negativeExponent = true; }
this.state = NUMBER7;
continue;
}
if (n >= 0x30 && n < 0x40) {
this.exponent = this.exponent * 10 + (n - 0x30);
this.state = NUMBER8;
continue;
}
this.charError(buffer, i);
case NUMBER7: // After +/-
n = buffer[i];
if (n >= 0x30 && n < 0x40) { // 0-9
this.exponent = this.exponent * 10 + (n - 0x30);
this.state = NUMBER8;
continue;
}
this.charError(buffer, i);
case NUMBER8: // * After digit (after +/-)
n = buffer[i];
if (n >= 0x30 && n < 0x40) { // 0-9
this.exponent = this.exponent * 10 + (n - 0x30);
continue;
}
this.finish();
i--;
continue;
case TRUE1: // r
if (buffer[i] === 0x72) {
this.state = TRUE2;
continue;
}
this.charError(buffer, i);
case TRUE2: // u
if (buffer[i] === 0x75) {
this.state = TRUE3;
continue;
}
this.charError(buffer, i);
case TRUE3: // e
if (buffer[i] === 0x65) {
this.state = START;
this.callbacks.onBoolean(true);
continue;
}
this.charError(buffer, i);
case FALSE1: // a
if (buffer[i] === 0x61) {
this.state = FALSE2;
continue;
}
this.charError(buffer, i);
case FALSE2: // l
if (buffer[i] === 0x6c) {
this.state = FALSE3;
continue;
}
this.charError(buffer, i);
case FALSE3: // s
if (buffer[i] === 0x73) {
this.state = FALSE4;
continue;
}
this.charError(buffer, i);
case FALSE4: // e
if (buffer[i] === 0x65) {
this.state = START;
this.callbacks.onBoolean(false);
continue;
}
this.charError(buffer, i);
case NULL1: // u
if (buffer[i] === 0x75) {
this.state = NULL2;
continue;
}
this.charError(buffer, i);
case NULL2: // l
if (buffer[i] === 0x6c) {
this.state = NULL3;
continue;
}
this.charError(buffer, i);
case NULL3: // l
if (buffer[i] === 0x6c) {
this.state = START;
this.callbacks.onNull();
continue;
}
this.charError(buffer, i);
}
}
};
proto.finish = function () {
switch (this.state) {
case NUMBER2: // * After initial zero
this.callbacks.onNumber(0);
this.state = START;
this.magnatude = undefined;
this.negative = undefined;
break;
case NUMBER3: // * After digit (before period)
this.state = START;
if (this.negative) {
this.magnatude = -this.magnatude;
this.negative = undefined;
}
this.callbacks.onNumber(this.magnatude);
this.magnatude = undefined;
break;
case NUMBER5: // * After digit (after period)
this.state = START;
if (this.negative) {
this.magnatude = -this.magnatude;
this.negative = undefined;
}
this.callbacks.onNumber(this.negative ? -this.magnatude : this.magnatude);
this.magnatude = undefined;
this.position = undefined;
break;
case NUMBER8: // * After digit (after +/-)
if (this.negativeExponent) {
this.exponent = -this.exponent;
this.negativeExponent = undefined;
}
this.magnatude *= Math.pow(10, this.exponent);
this.exponent = undefined;
if (this.negative) {
this.magnatude = -this.magnatude;
this.negative = undefined;
}
this.state = START;
this.callbacks.onNumber(this.magnatude);
this.magnatude = undefined;
break;
}
if (this.state !== START) {
this.callbacks.onError(new Error("Unexpected end of input stream"));
}
}
exports.SaxParser = SaxParser;
var SaxParser = require('./jsonparse').SaxParser;
var p = new SaxParser({
onNull: function () {
console.log("onNull");
},
onBoolean: function (value) {
console.log("onBoolean", value);
},
onNumber: function (value) {
console.log("onNumber", value);
},
onString: function (value) {
console.log("onString", value);
},
onStartObject: function () {
console.log("onStartObject");
},
onColon: function () {
console.log("onColon");
},
onComma: function () {
console.log("onComma");
},
onEndObject: function () {
console.log("onEndObject");
},
onStartArray: function () {
console.log("onStartArray");
},
onEndArray: function () {
console.log("onEndArray");
}
});
//var stream = require('fs').createReadStream(__dirname + "/tests/sample.json");
//var parse = p.parse.bind(p);
//stream.on('data', parse);
//p.parse(new Buffer("{}"));
//p.parse(new Buffer("[]"));
//p.parse(new Buffer('"Hello"'));
p.parse(new Buffer("[1,2,3]"));
p.parse(new Buffer("-0.1e-3"));
p.parse(new Buffer('"Hello"'));
p.finish()
//p.parse(new Buffer('{"name":"tim"}'))
@ZeikJT
Copy link

ZeikJT commented Sep 1, 2014

Is the null numbering on purpose?

var NULL2   = C.NULL3   = 0x42;
var NULL3   = C.NULL2   = 0x43;

NULL2 equals C.NULL3
NULL3 equals C.NULL2

@ORESoftware
Copy link

this is cool, but how do I use it? Right now I am using JSONStream...but I am wondering if there is a way to use this module directly. Dunno, cuz there are no examples that I see. Nice code though :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment