Last active
January 19, 2023 09:12
-
-
Save norswap/5f61bd584188afd2a347736c406c3c1c to your computer and use it in GitHub Desktop.
Handwritten JSON Parser with Parser Combinators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Code for lecture [7. Parsing Combinators] | |
// https://www.youtube.com/watch?v=3Cfq4i6754s&list=PLOech0kWpH8-njQpmSNGSiQBPUvl8v3IM&index=7 | |
import java.util.ArrayDeque; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Deque; | |
import java.util.HashMap; | |
import java.util.Scanner; | |
import java.util.function.Supplier; | |
public final class Combinators | |
{ | |
// === SETUP =================================================================================== | |
private static final String EXAMPLE_INPUT = | |
"{\n" + | |
" \"version\": 17,\n" + | |
" \"bundles\": [\n" + | |
" { \"name\" : \"org.graalvm.component.installer.Bundle\" },\n" + | |
" { \"name\" : \"org.graalvm.component.installer.commands.Bundle\" },\n" + | |
" { \"name\" : \"org.graalvm.component.installer.remote.Bundle\" },\n" + | |
" { \"name\" : \"org.graalvm.component.installer.os.Bundle\" }\n" + | |
" ]\n" + | |
"}"; | |
public static void main (String[] args) | |
{ | |
Combinators parsing = new Combinators(EXAMPLE_INPUT); | |
if (parsing.value.parse()) | |
System.out.println("GREAT SUCCESS"); | |
else | |
System.out.println("MISERABLE FAILURE"); | |
} | |
private final String input; | |
private int pos = 0; | |
private final Deque<Object> stack = new ArrayDeque<>(); | |
public Combinators(String input) { | |
this.input = input; | |
} | |
// === LEXICAL ================================================================================= | |
// Trash but short implementations - very inefficient! | |
private void skipWhitespace() { | |
while (pos < input.length() && (input.charAt(pos) == ' ' || input.charAt(pos) == '\n')) | |
++pos; | |
} | |
private boolean parseStringLit() { | |
if (pos >= input.length()) return false; | |
if (input.charAt(pos) != '"') | |
return false; | |
int last = input.substring(pos + 1).indexOf('"'); | |
if (last < 0) | |
return false; | |
stack.push(input.substring(pos + 1, pos + last + 1)); | |
pos += last + 2; | |
skipWhitespace(); | |
return true; | |
} | |
private boolean parseNumber() { | |
if (pos >= input.length()) return false; | |
Scanner scanner = new Scanner(input.substring(pos)); | |
String num = scanner.useDelimiter("[^0-9]").next(); | |
if (num.length() > 0) { | |
pos += num.length(); | |
stack.push(Integer.parseInt(num)); | |
return true; | |
} | |
return false; | |
} | |
private boolean parseChar(char c) { | |
if (pos >= input.length()) return false; | |
boolean success = input.charAt(pos) == c; | |
if (!success) return false; | |
++pos; | |
skipWhitespace(); | |
return true; | |
} | |
// === COMBINATORS ============================================================================= | |
private interface Parser { | |
boolean parse(); | |
} | |
// Lexical | |
private final class StringLitParser implements Parser { | |
@Override public boolean parse() { | |
return parseStringLit(); | |
} | |
} | |
private final class NumberParser implements Parser { | |
@Override public boolean parse() { | |
return parseNumber(); | |
} | |
} | |
private final class CharParser implements Parser { | |
public final char c; | |
private CharParser(char c) { | |
this.c = c; | |
} | |
@Override public boolean parse() { | |
return parseChar(c); | |
} | |
} | |
// Combinators | |
private final class Sequence implements Parser { | |
public final Parser[] children; | |
private Sequence(Parser... children) { | |
this.children = children; | |
} | |
@Override public boolean parse() { | |
int pos0 = pos; | |
for (Parser child: children) { | |
if (!child.parse()) { | |
pos = pos0; | |
return false; | |
} | |
} | |
return true; | |
} | |
} | |
public static final class ForwardReference implements Parser { | |
private final Supplier<Parser> supplier; | |
public ForwardReference(Supplier<Parser> supplier) { | |
this.supplier = supplier; | |
} | |
@Override public boolean parse() { | |
return supplier.get().parse(); | |
} | |
} | |
public static final class Repetition implements Parser { | |
public final Parser child; | |
public Repetition(Parser child) { | |
this.child = child; | |
} | |
@Override public boolean parse() { | |
while (child.parse()); | |
return true; | |
} | |
} | |
public static final class Optional implements Parser { | |
public final Parser child; | |
public Optional(Parser child) { | |
this.child = child; | |
} | |
@Override public boolean parse() { | |
child.parse(); | |
return true; | |
} | |
} | |
private static final class Choice implements Parser { | |
public final Parser[] children; | |
private Choice(Parser... children) { | |
this.children = children; | |
} | |
@Override public boolean parse() { | |
for (Parser child: children) | |
if (child.parse()) return true; | |
return false; | |
} | |
} | |
public final class ComposeObject implements Parser { | |
public final Parser child; | |
public ComposeObject(Parser child) { | |
this.child = child; | |
} | |
@Override public boolean parse() { | |
int stack0 = stack.size(); | |
boolean success = child.parse(); | |
if (!success) return false; | |
HashMap<String, Object> object = new HashMap<>(); | |
while (stack.size() > stack0) { | |
Object value = stack.pop(); | |
String string = (String) stack.pop(); | |
object.put(string, value); | |
} | |
stack.push(object); | |
return true; | |
} | |
} | |
public final class ComposeArray implements Parser { | |
public final Parser child; | |
public ComposeArray(Parser child) { | |
this.child = child; | |
} | |
@Override public boolean parse() { | |
int stack0 = stack.size(); | |
boolean success = child.parse(); | |
if (!success) return false; | |
ArrayList<Object> array = new ArrayList<>(); | |
while (stack.size() > stack0) | |
array.add(stack.pop()); | |
Collections.reverse(array); | |
stack.push(array); | |
return true; | |
} | |
} | |
// === PARSER ================================================================================== | |
/* | |
VALUE ::= STRINGLIT / NUMBER / OBJECT / ARRAY | |
OBJECT ::= "{" (PAIR ("," PAIR)* )? "}" | |
PAIR ::= STRINGLIT ":" VALUE | |
ARRAY ::= "[" (VALUE ("," VALUE)* )? "]" | |
*/ | |
// PAIR ::= STRINGLIT ":" VALUE | |
private final Parser pair = | |
new Sequence( | |
new StringLitParser(), | |
new CharParser(':'), | |
new ForwardReference(() -> this.value)); | |
// ("," PAIR)* | |
private final Parser pairTails = | |
new Repetition( | |
new Sequence( | |
new CharParser(','), | |
pair)); | |
// (PAIR ("," PAIR)* )? | |
private final Parser pairs = | |
new Optional( | |
new Sequence( | |
pair, | |
pairTails)); | |
// OBJECT ::= "{" (PAIR ("," PAIR)* )? "}" | |
private final Parser object = | |
new ComposeObject( | |
new Sequence( | |
new CharParser('{'), | |
pairs, | |
new CharParser('}'))); | |
// ("," VALUE)* | |
private final Parser valueTails = | |
new Repetition( | |
new Sequence( | |
new CharParser(','), | |
new ForwardReference(() -> this.value))); | |
// (VALUE ("," VALUE)* )? | |
private final Parser values = | |
new Optional( | |
new Sequence( | |
new ForwardReference(() -> this.value), | |
valueTails)); | |
// ARRAY ::= "[" (VALUE ("," VALUE)* )? "]" | |
private final Parser array = | |
new ComposeArray( | |
new Sequence( | |
new CharParser('['), | |
values, | |
new CharParser(']'))); | |
// VALUE ::= STRINGLIT / NUMBER / OBJECT / ARRAY | |
private final Parser value = | |
new Choice( | |
new StringLitParser(), | |
new NumberParser(), | |
object, | |
array); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment