Last active
September 12, 2023 13:56
-
-
Save weixinfree/1f3fa9287fb6901efb64a879a79a599c to your computer and use it in GitHub Desktop.
kotlin 实现的 parse combinator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| @file:Suppress("MemberVisibilityCanBePrivate", "RemoveExplicitTypeArguments") | |
| package com.duomi.parser | |
| class ParseException(msg: String, src: Source) : RuntimeException("$msg, occur at src: $src") | |
| var enableAutoSkipSpace = true | |
| class Source(val src: String) { | |
| var position: Int = 0 | |
| private set | |
| val cursor: Char? | |
| get() = if (position in src.indices) src[position] else null | |
| val next: Char? | |
| get() = if (position + 1 < src.length) src[position + 1] else null | |
| val previous: Char? | |
| get() = if (position - 1 >= 0) src[position - 1] else null | |
| val isEof: Boolean | |
| get() = position >= src.length | |
| fun advance(step: Int = 1) { | |
| position += step | |
| } | |
| fun seek(pos: Int) { | |
| position = pos | |
| } | |
| fun cut(start: Int, end: Int): String = src.substring(start until end) | |
| fun <T> match(parser: Parser<T>): T = parser.parse(this) | |
| fun match(c: Char): Char = char(c).parse(this) | |
| fun match(literal: String): String = literal.asParser().parse(this) | |
| fun capture(parseBlock: (Source) -> Unit): String { | |
| val start = position | |
| parseBlock(this) | |
| val end = position | |
| return cut(start, end) | |
| } | |
| fun lookAhead(parser: Parser<*>): Boolean { | |
| val pos = position | |
| return try { | |
| parser.parse(this) | |
| seek(pos) | |
| true | |
| } catch (e: ParseException) { | |
| seek(pos) | |
| false | |
| } | |
| } | |
| fun lookAhead(literal: String) = lookAhead(literal.asParser()) | |
| fun lookAhead(c: Char) = lookAhead(c.asParser()) | |
| override fun toString(): String { | |
| return "cursor: $position, near by: \".>$cursor<.${ | |
| src.substring( | |
| (position + 1).coerceAtMost(src.length), | |
| (position + 10).coerceAtMost(src.length) | |
| ) | |
| }\" " | |
| } | |
| } | |
| fun interface Parser<T> { | |
| fun parse(source: Source): T | |
| } | |
| fun <T> recurParser(parse: (Source) -> T) = parser { src -> | |
| parser { parse(it) }.parse(src) | |
| } | |
| fun <T> parser(name: String? = null, nameSupplier: () -> String = { "" }, parser: (Source) -> T): Parser<T> { | |
| return object : Parser<T> { | |
| override fun parse(source: Source): T { | |
| return parser(source).also { | |
| if (enableAutoSkipSpace) { | |
| while (!source.isEof && source.cursor?.isWhitespace() == true) source.advance() | |
| } | |
| } | |
| } | |
| override fun toString(): String { | |
| return name ?: nameSupplier() | |
| } | |
| } | |
| } | |
| fun eof() = parser("eof") { if (it.isEof) true else throw ParseException("eof", it) } | |
| fun char(c: Char) = parser { src -> | |
| if (src.cursor == c) { | |
| src.advance(); c | |
| } else throw ParseException("parse char: '$c' failed", src) | |
| } | |
| fun Char.asParser() = char(this) | |
| fun digits() = isSatisfy("digit") { it.isDigit() } | |
| fun word() = isSatisfy("word") { it.isLetterOrDigit() } | |
| fun space() = isSatisfy("space", minMatchInclusive = 0) { it.isWhitespace() } | |
| fun isSatisfy( | |
| desc: String = "", | |
| minMatchInclusive: Int = 1, | |
| maxMatchInclusive: Int = Integer.MAX_VALUE, | |
| test: (Char) -> Boolean | |
| ) = parser(desc) { src -> | |
| val start = src.position | |
| while (!src.isEof && src.cursor != null && test(src.cursor!!)) { | |
| src.advance() | |
| } | |
| val matchCount = src.position - start | |
| if (matchCount < minMatchInclusive || matchCount > maxMatchInclusive) throw ParseException( | |
| "parse \"$desc\"[$minMatchInclusive - $maxMatchInclusive] failed", | |
| src | |
| ) | |
| src.cut(start, src.position) | |
| } | |
| fun stringToParser(chars: String) = parser("is \"$chars\"") { src -> | |
| var index = 0 | |
| while (!src.isEof && index < chars.length && chars[index] == src.cursor) { | |
| index++ | |
| src.advance() | |
| } | |
| if (index == chars.length) { | |
| chars | |
| } else { | |
| throw ParseException("match chars:\"$chars\" failed", src) | |
| } | |
| } | |
| fun String.asParser(): Parser<String> = stringToParser(this) | |
| fun string(char: Char = '"') = parser { src -> | |
| if (src.isEof || src.cursor != char) { | |
| throw ParseException("except string start, $char", src) | |
| } | |
| src.advance() | |
| val start = src.position | |
| var find = false | |
| while (!src.isEof) { | |
| if (src.cursor == char && src.previous != '\\') { | |
| find = true | |
| break | |
| } | |
| src.advance() | |
| } | |
| src.advance() | |
| if (find) { | |
| src.cut(start, src.position - 1) | |
| } else { | |
| throw ParseException("parse string failed", src) | |
| } | |
| } | |
| /////////////////////////////////////////////////////////////////////////// | |
| // | |
| /////////////////////////////////////////////////////////////////////////// | |
| fun <T, R> Parser<T>.map(mapper: (T) -> R): Parser<R> = | |
| parser(nameSupplier = { this.toString() }) { mapper(this.parse(it)) } | |
| fun <T> many(parser: Parser<T>, minInclusive: Int = 0, maxInclusive: Int = Integer.MAX_VALUE) = parser { src -> | |
| val collector = mutableListOf<T>() | |
| while (true) { | |
| collector += src.runCatching { parser.parse(src) }.getOrNull() ?: break | |
| if (collector.size > maxInclusive) throw ParseException("exceed max match count, expect max: $maxInclusive", src) | |
| } | |
| if (collector.size < minInclusive) throw ParseException("lower than min match count, except min: $minInclusive", src) | |
| collector | |
| } | |
| fun <T> Parser<T>.many1() = many(this, 1) | |
| fun <T> Parser<T>.many0() = many(this, 0) | |
| fun <T> option(parser: Parser<T>): Parser<Result<T>> = | |
| parser { src -> src.runCatching { parser.parse(it) } } | |
| fun <T> Parser<T>.option_(): Parser<Result<T>> = option(this) | |
| fun <B, S> Parser<B>.sepBy(sep: Parser<S>): Parser<MutableList<B>> = parser { src -> | |
| val collector = mutableListOf<B>() | |
| collector += this.parse(src) | |
| while (true) { | |
| src.runCatching { sep.parse(it) }.getOrNull() ?: break | |
| collector += src.runCatching { this.parse(it) }.getOrNull() ?: break | |
| } | |
| collector | |
| } | |
| infix fun <T> Parser<T>.or(right: Parser<out T>) = parser<T> { src -> | |
| src.runCatching { this.parse(src) }.getOrElse { right.parse(src) } | |
| } | |
| infix fun Parser<String>.or(right: String) = parser { src -> | |
| src.runCatching { this.parse(src) }.getOrElse { right.asParser().parse(src) } | |
| } | |
| infix fun String.or(right: String) = this.asParser() or right.asParser() | |
| infix fun Char.or(right: Char) = this.asParser() or right.asParser() | |
| fun capture(vararg parsers: Parser<*>) = parser<String> { src -> | |
| val start = src.position | |
| parsers.forEach { it.parse(src) } | |
| src.cut(start, src.position) | |
| } | |
| fun <T> Source.runCatching(action: (Source) -> T): Result<T> { | |
| val flag = this.position | |
| return try { | |
| Result.success(action(this)) | |
| } catch (e: ParseException) { | |
| this.seek(flag) | |
| Result.failure(e) | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment