Last active
January 17, 2023 17:33
-
-
Save conartist6/71fd8e40f34a6a2267d4fd6990cb686c to your computer and use it in GitHub Desktop.
A human-friendly json parser with parserate
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import parserate from '@iter-tools/parserate'; | |
const t = { | |
token: (value) => ({ type: 'token', value }), | |
literal: (value) => ({ type: 'literal', value }), | |
}; | |
const escapes = { | |
'"': '"', | |
'\\': '\\', | |
b: '\b', | |
f: '\f', | |
n: '\n', | |
r: '\r', | |
t: '\t', | |
}; | |
const escapeChars = Object.keys(escapes); | |
function* tokenizeString(parsr) { | |
let literal = ''; | |
parsr.take('"'); | |
while(!parsr.done && !parsr.match('"')) { | |
if (parsr.takeMatch('\\')) { | |
if (parsr.takeMatch('u')) { | |
const [charCode] = parsr.take(/\d{4}/); | |
literal += String.fromCharCode(parseInt(charCode, 16)); | |
} else if (parsr.takeMatch(escapeChars)) { | |
literal += escapes[parsr.match[0]]; | |
} else if (parsr.takeMatch(/./)) { | |
literal += parsr.match[0]; | |
} else { | |
parsr.error(); | |
} | |
} else if (parsr.takeMatch(/./)) { | |
literal += parsr.match[0]; | |
} else { | |
parsr.error(); | |
} | |
} | |
parsr.take('"'); | |
yield t.literal(literal); | |
} | |
export function* tokenize(input) { | |
const parsr = parserate(input); | |
while (!parsr.done) { | |
if (parsr.takeMatch('null')) { | |
yield t.literal(null); | |
} else if (parsr.match('"')) { | |
yield* tokenizeString(parsr); | |
} else if (parsr.takeMatch(['[', ']', '{', '}', ':', ','])) { | |
yield t.token(parsr.match[0]); | |
} else if (parsr.takeMatch(/\s+/)) { | |
} else { | |
throw parsr.error(); | |
} | |
} | |
} | |
function parseValue(parsr) { | |
while(!parsr.done) { | |
const token = parsr.value; | |
switch(token.type) { | |
case 'literal': | |
return token.value; | |
case 'token': | |
if (parsr.takeMatch('{')) { | |
const obj = {}; | |
while (!parsr.done && !parsr.match('}')) { | |
const [key] = parsr; | |
if (key.type !== 'literal') parsr.error(key); | |
parsr.take(':'); | |
obj[key.value] = parseValue(parsr); | |
if (!parsr.takeMatch(',')) break; | |
} | |
parsr.take('}'); | |
return obj; | |
} else if (parsr.takeMatch('[')) { | |
const arr = []; | |
while (!parsr.done && !parsr.match(']')) { | |
arr.push(parseValue(parsr)); | |
if (!parsr.takeMatch(',')) break; | |
} | |
parsr.take(']'); | |
return arr; | |
} | |
} | |
} | |
} | |
export function parse(input) { | |
return parseValue(parserate(tokenize(input))); | |
} | |
export function* parseStream(input) { | |
const parsr = parserate(tokenize(input)); | |
parsr.take('['); | |
while (!parsr.done && !parsr.match(']')) { | |
yield parseValue(parsr); | |
if (!parsr.takeMatch(',')) break; | |
} | |
parsr.take(']'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Some things I think are particularly compelling about writing a parser like this:
SyntaxError: Unexpected token: 'undefined'
instead ofSyntaxError: Unexpected character: 'u'
.