Last active
April 9, 2016 09:58
-
-
Save routevegetable/5999e932f39ea78dc16fbbe7766be43e to your computer and use it in GitHub Desktop.
Monadic Java Parsers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Monadic Parsers for Java! | |
*/ | |
public abstract class Parser<T> { | |
/** | |
* An example | |
*/ | |
public static void main(String[] args) throws IOException { | |
// Consume some bytes and add them | |
Parser<Integer> adderParser = BYTE_PAIR.new Lift<Integer>() { | |
protected Integer f(List<Integer> value) { | |
return value.get(0) + value.get(1); | |
} | |
}; | |
ParserResult<Integer> sumResult = adderParser.parse(System.in); | |
System.err.println("Sum: " + sumResult.obj); | |
} | |
/** | |
* A byte-reader | |
*/ | |
public final static Parser<Integer> UINT_8 = new Parser<Integer>() { | |
protected ParserResult<Integer> parse(InputStream is) throws IOException { | |
return new ParserResult<>(1, (Integer)is.read()); | |
} | |
}; | |
/** | |
* For reading large numbers of bytes | |
*/ | |
public final static Parser<byte[]> BYTES(final int n) { | |
return new Parser<byte[]>() { | |
protected ParserResult<byte[]> parse(InputStream is) throws IOException { | |
byte[] arr = new byte[n]; | |
int fill = 0; | |
while(fill < n) is.read(arr, fill, n-fill); | |
return new ParserResult<>(n, arr); | |
} | |
}; | |
} | |
/** | |
* Compose 2 byte-readers to make a byte-pair reader | |
*/ | |
public final static Parser<List<Integer>> BYTE_PAIR = SEQ(UINT_8, UINT_8); | |
/** | |
* Make a u16-reader | |
*/ | |
public final static Parser<Integer> LE_UINT_16 = BYTE_PAIR.new Lift<Integer>() { | |
protected Integer f(List<Integer> value) { | |
return value.get(0) | (value.get(1) << 8); | |
} | |
}; | |
/** | |
* Read 3 ints | |
*/ | |
public final static Parser<List<Integer>> THREE_INTS = SEQ(UINT_8, UINT_8, UINT_8); | |
/** | |
* Read a 4KB byte array | |
*/ | |
public final static Parser<byte[]> FOUR_KB = BYTES(4096); | |
/** | |
* Read a variable-length array (prefixed with size-byte) | |
*/ | |
public final static Parser<byte[]> VAR_BYTE_ARRAY = UINT_8.new Bind<byte[]>() { | |
protected Parser<byte[]> next(final Integer lenByte) { | |
return BYTES(lenByte); | |
} | |
}; | |
/** | |
* Read a variable-length array (prefixed with size-byte), but fail the parse if it's longer than a given limit | |
*/ | |
public final static Parser<byte[]> VAR_BYTE_ARRAY_LIMITED(final int max) { | |
return UINT_8.new Bind<byte[]>() { | |
protected Parser<byte[]> next(final Integer lenByte) { | |
return lenByte <= max ? BYTES(lenByte) : null; | |
} | |
}; | |
} | |
/** | |
* A string terminated by a newline | |
*/ | |
public final static Parser<String> LINE_STRING = UINT_8.new Bind<String>() { | |
protected Parser<String> next(final Integer charCode) { | |
// Handle the end case. | |
if(charCode == '\n') return Return(""); | |
// Not end character - combine with rest of string | |
return LINE_STRING.new Lift<String>() { | |
protected String f(String str) { | |
return ((char)(int)charCode) + str; | |
} | |
}; | |
} | |
}; | |
/** | |
* A number of lines terminated with a "." line | |
*/ | |
public final static Parser<List<String>> LINE_STRINGS = LINE_STRING.new Bind<List<String>>(){ | |
protected Parser<List<String>> next(String firstLine) { | |
if(firstLine.equals(".")) { | |
return Return((List<String>)new ArrayList<String>()); | |
} | |
return LINE_STRINGS.new Lift<List<String>>() { | |
protected List<String> f(List<String> nextLines) { | |
final List<String> result = new ArrayList<>(); | |
result.add(firstLine); | |
result.addAll(nextLines); | |
return result; | |
} | |
}; | |
} | |
}; | |
/** | |
* Read 4KB, and convert to integers | |
*/ | |
public final static Parser<List<Integer>> FOUR_KB_OF_INTS = FOUR_KB.new Lift<List<Integer>>() { | |
protected List<Integer> f(byte[] value) { | |
List<Integer> outs = new ArrayList<>(value.length); | |
for(int i = 0; i < value.length; i++) | |
outs.add((int) value[i]); | |
return outs; | |
} | |
}; | |
/** | |
* Make this functionality generic over Parser<byte[]> | |
*/ | |
public final static Parser<List<Integer>> BYTES_TO_INTS(Parser<byte[]> in) { | |
return in.new Lift<List<Integer>>() { | |
protected List<Integer> f(byte[] value) { | |
List<Integer> outs = new ArrayList<>(value.length); | |
for(int i = 0; i < value.length; i++) | |
outs.add((int) value[i]); | |
return outs; | |
} | |
}; | |
} | |
/** | |
* Rewrite of FOUR_KB_OF_INTS to use above generic function | |
*/ | |
public final static Parser<List<Integer>> FOUR_KB_OF_INTS_BETTER = BYTES_TO_INTS(FOUR_KB); | |
/** | |
* An operator which ensures a result is what is expected | |
*/ | |
public final Parser<T> EXPECT(final T expected) { | |
return this.new Bind<T>() { | |
protected Parser<T> next(T actual) { | |
return ((expected == null && actual == null) || (expected != null && expected.equals(actual))) ? Return(actual) : null; | |
} | |
}; | |
} | |
/** | |
* A parsing result | |
*/ | |
public static class ParserResult<T> { | |
/** | |
* Number of bytes consumed | |
*/ | |
public final int read; | |
/** | |
* Parsed object | |
*/ | |
public final T obj; | |
public ParserResult(int read, T obj) { | |
this.read = read; | |
this.obj = obj; | |
} | |
} | |
/** | |
* Parse the contents of an InputStream | |
* | |
* @return A ParserResult<T> on success. Null on failure. | |
*/ | |
protected abstract ParserResult<T> parse(InputStream is) throws IOException; | |
/* ---------- Monad Primitives ---------- */ | |
/** | |
* Bind operator to compose parsers. | |
*/ | |
public abstract class Bind<U> extends Parser<U> { | |
protected abstract Parser<U> next(T value); | |
@Override | |
protected final ParserResult<U> parse(InputStream is) throws IOException { | |
ParserResult<T> firstResult = Parser.this.parse(is); | |
if(firstResult != null) { | |
ParserResult<U> secondResult = next(firstResult.obj).parse(is); | |
return new ParserResult<>(firstResult.read + secondResult.read, secondResult.obj); | |
} | |
return null; | |
} | |
} | |
/** | |
* The 'Unit' parser. | |
*/ | |
public static <T> Parser<T> Return(final T value) { | |
return new Parser<T>() { | |
protected final ParserResult<T> parse(InputStream is) throws IOException { | |
return new ParserResult<>(0, value); | |
} | |
}; | |
} | |
/* ---------- Combinators ---------- */ | |
/** | |
* Lift a function 'U f(T)', to make it transform this parser's output. | |
*/ | |
public abstract class Lift<U> extends Parser<U> { | |
private final Parser<U> p; | |
public Lift() { | |
p = Parser.this.new Bind<U>() { | |
protected Parser<U> next(T value) { | |
return Return(f(value)); | |
} | |
}; | |
} | |
protected final ParserResult<U> parse(InputStream is) throws IOException { | |
return p.parse(is); | |
} | |
protected abstract U f(T value); | |
} | |
/** | |
* Combine a sequence of parsers. | |
* @param ps | |
* @return | |
*/ | |
public static <T> Parser<List<T>> SEQ(Parser<T>... ps) { | |
return Seq(Arrays.asList(ps)).new Bind<List<T>>() { | |
protected Parser<List<T>> next(List<T> value) { | |
return Return(value); | |
} | |
}; | |
} | |
private static <T> Parser<List<T>> Seq(List<Parser<T>> ps) { | |
if(ps.size() == 0) | |
return Return((List<T>)new ArrayList<T>()); | |
Parser<T> head = ps.get(0); | |
final List<Parser<T>> tail = ps.subList(1, ps.size()); | |
return head.new Bind<List<T>>() { | |
protected Parser<List<T>> next(final T firstValue) { | |
return Seq(tail).new Bind<List<T>>() { | |
protected Parser<List<T>> next(List<T> tailValues) { | |
List<T> result = new ArrayList<>(); | |
result.add(firstValue); | |
result.addAll(tailValues); | |
return Return(result); | |
} | |
}; | |
} | |
}; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment