Skip to content

Instantly share code, notes, and snippets.

@norswap
Last active January 19, 2023 09:12
Show Gist options
  • Save norswap/5f61bd584188afd2a347736c406c3c1c to your computer and use it in GitHub Desktop.
Save norswap/5f61bd584188afd2a347736c406c3c1c to your computer and use it in GitHub Desktop.
Handwritten JSON Parser with Parser Combinators
// Code for lecture [7. Parsing Combinators]
// https://www.youtube.com/watch?v=3Cfq4i6754s&list=PLOech0kWpH8-njQpmSNGSiQBPUvl8v3IM&index=7
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.Scanner;
import java.util.function.Supplier;
public final class Combinators
{
// === SETUP ===================================================================================
private static final String EXAMPLE_INPUT =
"{\n" +
" \"version\": 17,\n" +
" \"bundles\": [\n" +
" { \"name\" : \"org.graalvm.component.installer.Bundle\" },\n" +
" { \"name\" : \"org.graalvm.component.installer.commands.Bundle\" },\n" +
" { \"name\" : \"org.graalvm.component.installer.remote.Bundle\" },\n" +
" { \"name\" : \"org.graalvm.component.installer.os.Bundle\" }\n" +
" ]\n" +
"}";
public static void main (String[] args)
{
Combinators parsing = new Combinators(EXAMPLE_INPUT);
if (parsing.value.parse())
System.out.println("GREAT SUCCESS");
else
System.out.println("MISERABLE FAILURE");
}
private final String input;
private int pos = 0;
private final Deque<Object> stack = new ArrayDeque<>();
public Combinators(String input) {
this.input = input;
}
// === LEXICAL =================================================================================
// Trash but short implementations - very inefficient!
private void skipWhitespace() {
while (pos < input.length() && (input.charAt(pos) == ' ' || input.charAt(pos) == '\n'))
++pos;
}
private boolean parseStringLit() {
if (pos >= input.length()) return false;
if (input.charAt(pos) != '"')
return false;
int last = input.substring(pos + 1).indexOf('"');
if (last < 0)
return false;
stack.push(input.substring(pos + 1, pos + last + 1));
pos += last + 2;
skipWhitespace();
return true;
}
private boolean parseNumber() {
if (pos >= input.length()) return false;
Scanner scanner = new Scanner(input.substring(pos));
String num = scanner.useDelimiter("[^0-9]").next();
if (num.length() > 0) {
pos += num.length();
stack.push(Integer.parseInt(num));
return true;
}
return false;
}
private boolean parseChar(char c) {
if (pos >= input.length()) return false;
boolean success = input.charAt(pos) == c;
if (!success) return false;
++pos;
skipWhitespace();
return true;
}
// === COMBINATORS =============================================================================
private interface Parser {
boolean parse();
}
// Lexical
private final class StringLitParser implements Parser {
@Override public boolean parse() {
return parseStringLit();
}
}
private final class NumberParser implements Parser {
@Override public boolean parse() {
return parseNumber();
}
}
private final class CharParser implements Parser {
public final char c;
private CharParser(char c) {
this.c = c;
}
@Override public boolean parse() {
return parseChar(c);
}
}
// Combinators
private final class Sequence implements Parser {
public final Parser[] children;
private Sequence(Parser... children) {
this.children = children;
}
@Override public boolean parse() {
int pos0 = pos;
for (Parser child: children) {
if (!child.parse()) {
pos = pos0;
return false;
}
}
return true;
}
}
public static final class ForwardReference implements Parser {
private final Supplier<Parser> supplier;
public ForwardReference(Supplier<Parser> supplier) {
this.supplier = supplier;
}
@Override public boolean parse() {
return supplier.get().parse();
}
}
public static final class Repetition implements Parser {
public final Parser child;
public Repetition(Parser child) {
this.child = child;
}
@Override public boolean parse() {
while (child.parse());
return true;
}
}
public static final class Optional implements Parser {
public final Parser child;
public Optional(Parser child) {
this.child = child;
}
@Override public boolean parse() {
child.parse();
return true;
}
}
private static final class Choice implements Parser {
public final Parser[] children;
private Choice(Parser... children) {
this.children = children;
}
@Override public boolean parse() {
for (Parser child: children)
if (child.parse()) return true;
return false;
}
}
public final class ComposeObject implements Parser {
public final Parser child;
public ComposeObject(Parser child) {
this.child = child;
}
@Override public boolean parse() {
int stack0 = stack.size();
boolean success = child.parse();
if (!success) return false;
HashMap<String, Object> object = new HashMap<>();
while (stack.size() > stack0) {
Object value = stack.pop();
String string = (String) stack.pop();
object.put(string, value);
}
stack.push(object);
return true;
}
}
public final class ComposeArray implements Parser {
public final Parser child;
public ComposeArray(Parser child) {
this.child = child;
}
@Override public boolean parse() {
int stack0 = stack.size();
boolean success = child.parse();
if (!success) return false;
ArrayList<Object> array = new ArrayList<>();
while (stack.size() > stack0)
array.add(stack.pop());
Collections.reverse(array);
stack.push(array);
return true;
}
}
// === PARSER ==================================================================================
/*
VALUE ::= STRINGLIT / NUMBER / OBJECT / ARRAY
OBJECT ::= "{" (PAIR ("," PAIR)* )? "}"
PAIR ::= STRINGLIT ":" VALUE
ARRAY ::= "[" (VALUE ("," VALUE)* )? "]"
*/
// PAIR ::= STRINGLIT ":" VALUE
private final Parser pair =
new Sequence(
new StringLitParser(),
new CharParser(':'),
new ForwardReference(() -> this.value));
// ("," PAIR)*
private final Parser pairTails =
new Repetition(
new Sequence(
new CharParser(','),
pair));
// (PAIR ("," PAIR)* )?
private final Parser pairs =
new Optional(
new Sequence(
pair,
pairTails));
// OBJECT ::= "{" (PAIR ("," PAIR)* )? "}"
private final Parser object =
new ComposeObject(
new Sequence(
new CharParser('{'),
pairs,
new CharParser('}')));
// ("," VALUE)*
private final Parser valueTails =
new Repetition(
new Sequence(
new CharParser(','),
new ForwardReference(() -> this.value)));
// (VALUE ("," VALUE)* )?
private final Parser values =
new Optional(
new Sequence(
new ForwardReference(() -> this.value),
valueTails));
// ARRAY ::= "[" (VALUE ("," VALUE)* )? "]"
private final Parser array =
new ComposeArray(
new Sequence(
new CharParser('['),
values,
new CharParser(']')));
// VALUE ::= STRINGLIT / NUMBER / OBJECT / ARRAY
private final Parser value =
new Choice(
new StringLitParser(),
new NumberParser(),
object,
array);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment