Skip to content

Instantly share code, notes, and snippets.

@simonesestito
Last active January 3, 2020 17:00
Show Gist options
  • Save simonesestito/8fe0aff24488255ad6b35bdb46a67e8c to your computer and use it in GitHub Desktop.
Save simonesestito/8fe0aff24488255ad6b35bdb46a67e8c to your computer and use it in GitHub Desktop.
JSON Parser in JavaScript

JSON Parser

Note: This must be intended as an exercise, nothing more. It may contain bugs and doesn't implement the full JSON specification.

How to run the test

git clone https://gist.github.com/simonesestito/8fe0aff24488255ad6b35bdb46a67e8c json_parser --depth=1
node json_parser/
const { repeat } = require('./utils');
/**
* Manage the input string as a stream of characters.
*/
class CharStream {
static empty = ['\n', ' ', '\t'];
cursor = 0;
/**
* @param {string} string The string to read from
*/
constructor(string) {
this.string = string;
}
/**
* Return the current character and point to the next one.
* @returns The current character, or undefined if the cursor is out of string bounds.
*/
next() {
return this.string[this.cursor++];
}
/**
* Return the current character.
* @returns The current character, or undefined if the cursor is out of string bounds.
*/
get() {
return this.string[this.cursor];
}
/**
* Skip all characters considered empty.
* @returns {CharStream} The current instance.
*/
skipEmpty() {
while (CharStream.empty.includes(this.get())) {
this.next();
}
return this;
}
/**
* Point to the first non-consumed character and compare its value with the expected one.
* @param {string} expected The next character expected in the string.
* @throws If the next character is different from the expected one.
*/
expectNext(expected) {
if (expected !== this.get()) {
const chunkStart = Math.max(0, this.cursor - 25);
const chunkEnd = Math.min(this.string.length, this.cursor + 25);
const startPadding = this.cursor - chunkStart;
const chunk = this.string.slice(chunkStart, chunkEnd);
const errMessage = `Unexpected char: found '${this.get()}', expected '${expected}'\n` +
'\t' + chunk + '\n' +
'\t' + repeat(' ', startPadding) + '^\n';
throw new Error(errMessage);
}
this.next();
}
/**
* Compare the current character with the expected one.
* @param {string} expected The current character expected in the string.
* @throws If the current character is different from the expected one.
*/
expectCurrent(expected) {
this.expectNext(expected);
}
}
module.exports = { CharStream };
const { parseJson } = require('./parser');
const TIME_TAG = 'JSON Parsing';
// Given object
const given = {
anArray: [
{
pi: 3.14,
integer: 2,
negative: -5.34,
randomObject: {
"Here it is": "A lot of...\n\t\"STRINGS\"",
"many items": [ "Item_1", "Item_2" ],
},
},
[ -500 ],
null,
false
],
truth: true
};
// Stringify the given object
const stringified = JSON.stringify(given);
// Test the actual function to parse the JSON
console.time(TIME_TAG);
const result = parseJson(stringified);
console.timeEnd(TIME_TAG);
// Expect the JSON of the parsed object to be
// exactly as the original JSON
const success = JSON.stringify(result) === stringified;
console.log(success ? 'Success' : 'Failed');
const { CharStream } = require('./char-stream');
const { unescapeChar } = require('./utils');
/**
* Parse a JSON string.
* @param {string} str The JSON string to parse
* @returns {any} The parsed JSON value
* @throws In case of error parsing the JSON
*/
function parseJson(str) {
const input = new CharStream(str);
const value = readValue(input);
const nextChar = input.next();
if (nextChar !== undefined)
throw new Error(nextChar);
return value;
}
/**
* Read the next value from the given input.
* @param {CharStream} input The input to read from.
* @returns {any} The value read
* @throws In case of error reading the value
*/
function readValue(input) {
const nextChar = input.skipEmpty().get();
switch (nextChar) {
case '{':
return readObject(input);
case '[':
return readArray(input);
case '"':
return readString(input);
case 't':
return readExact(input, true);
case 'f':
return readExact(input, false);
case 'n':
return readExact(input, null);
}
if (!Number.isNaN(Number.parseInt(nextChar)) || nextChar === '-')
return readNumber(input);
// Unparsable value
// throw new Error(`Unexpected token: '${nextChar}'`);
input.expectCurrent('');
}
/**
* Read exactly the given value from the input.
* The value is stringified and each char of the string representation is expected.
* @param {CharStream} input The input to read from.
* @param {any} value The expected value
* @returns {any} The value read
* @throws If the read value doesn't match the given one.
*/
function readExact(input, value) {
const chars = String(value);
for (const c of chars) {
input.expectNext(c);
}
return value;
}
/**
* Read the next number from the given input.
* Returns when the number in the input stream is ended.
* @param {CharStream} input The input to read from.
* @returns {number} The number read
* @throws If the stream doesn't point to a number
*/
function readNumber(input) {
let num = 0;
let negative = false;
let c = input.skipEmpty().next();
if (c === '-') {
negative = true;
c = input.next();
}
// First char must be a digit
if (Number.isNaN(num = Number.parseInt(c)))
throw new Error(`Unexpected ${c}, expected a digit.`);
let digit;
while (!Number.isNaN(digit = Number.parseInt(input.get()))) {
num = num * 10 + digit;
input.next();
}
// Check for decimal values
if (input.get() === '.') {
// Consume the dot and continue parsing as a number
input.next();
let floatFactor = 0.1;
while (!Number.isNaN(digit = Number.parseInt(input.get()))) {
num += digit * floatFactor;
floatFactor *= 0.1;
input.next();
}
}
return negative ? -num : num;
}
/**
* Read the next string from the given input.
* @param {CharStream} input The input to read from.
* @returns {string} The string read
* @throws If the stream doesn't point to a string
*/
function readString(input) {
const chars = [];
input.skipEmpty().expectNext('"');
let c, escaped = false;
while ((c = input.get()) != '"' || escaped) {
if (c === '\n')
throw new Error('Unexpected newline while parsing a string');
if (c === '\\' && !escaped) {
escaped = true;
input.next();
continue;
}
if (escaped) {
c = unescapeChar(c);
escaped = false;
}
chars.push(c);
input.next();
}
input.expectCurrent('"');
return chars.join('');
}
/**
* Read the next object from the given input.
* @param {CharStream} input The input to read from.
* @returns {object} The object read
* @throws If the stream doesn't point to a valid object
*/
function readObject(input) {
input.skipEmpty().expectNext('{');
const obj = {};
let objectHasItems = false;
do {
if (objectHasItems) {
// Consume the comma
input.next();
}
const key = readString(input);
input.skipEmpty().expectNext(':');
obj[key] = readValue(input);
objectHasItems = true;
} while (input.skipEmpty().get() === ',');
input.expectCurrent('}');
return obj;
}
/**
* Read the next array from the given input.
* @param {CharStream} input The input to read from.
* @returns {Array<any>} The array read
* @throws If the stream doesn't point to a valid array
*/
function readArray(input) {
input.skipEmpty().expectNext('[');
const values = [];
do {
if (values.length > 0) {
// Consume the previous the comma
input.next();
}
values.push(readValue(input));
} while (input.skipEmpty().get() === ',');
input.expectCurrent(']');
return values;
}
module.exports = { parseJson };
/**
* Resolve an escaped character
* @param {string} escaped The character placed after the escaping (\)
* @return Unescaped character
* @throws If the escaping is unknown
*/
function unescapeChar(escaped) {
switch (escaped) {
case '\\':
case '"':
return escaped;
case 'n':
return '\n';
case 'b':
return '\b';
case 'f':
return '\f';
case 'r':
return '\r';
case 't':
return '\t';
default:
throw new Error(`Unexpected escaping ${escaped}`);
}
}
/**
* Repeat a string several times.
* @param {string} value The string to repeat
* @param {number} times The number of times to repeat the string
* @param {string} separator The separator to use between string repetitions
*/
function repeat(value, times, separator = '') {
const arr = [];
while (times-- > 0)
arr.push(value);
return arr.join(separator);
}
module.exports = {
unescapeChar,
repeat
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment