Skip to content

Instantly share code, notes, and snippets.

@weixinfree
Last active September 12, 2023 13:56
Show Gist options
  • Select an option

  • Save weixinfree/1f3fa9287fb6901efb64a879a79a599c to your computer and use it in GitHub Desktop.

Select an option

Save weixinfree/1f3fa9287fb6901efb64a879a79a599c to your computer and use it in GitHub Desktop.
kotlin 实现的 parse combinator
@file:Suppress("MemberVisibilityCanBePrivate", "RemoveExplicitTypeArguments")
package com.duomi.parser
class ParseException(msg: String, src: Source) : RuntimeException("$msg, occur at src: $src")
var enableAutoSkipSpace = true
class Source(val src: String) {
var position: Int = 0
private set
val cursor: Char?
get() = if (position in src.indices) src[position] else null
val next: Char?
get() = if (position + 1 < src.length) src[position + 1] else null
val previous: Char?
get() = if (position - 1 >= 0) src[position - 1] else null
val isEof: Boolean
get() = position >= src.length
fun advance(step: Int = 1) {
position += step
}
fun seek(pos: Int) {
position = pos
}
fun cut(start: Int, end: Int): String = src.substring(start until end)
fun <T> match(parser: Parser<T>): T = parser.parse(this)
fun match(c: Char): Char = char(c).parse(this)
fun match(literal: String): String = literal.asParser().parse(this)
fun capture(parseBlock: (Source) -> Unit): String {
val start = position
parseBlock(this)
val end = position
return cut(start, end)
}
fun lookAhead(parser: Parser<*>): Boolean {
val pos = position
return try {
parser.parse(this)
seek(pos)
true
} catch (e: ParseException) {
seek(pos)
false
}
}
fun lookAhead(literal: String) = lookAhead(literal.asParser())
fun lookAhead(c: Char) = lookAhead(c.asParser())
override fun toString(): String {
return "cursor: $position, near by: \".>$cursor<.${
src.substring(
(position + 1).coerceAtMost(src.length),
(position + 10).coerceAtMost(src.length)
)
}\" "
}
}
fun interface Parser<T> {
fun parse(source: Source): T
}
fun <T> recurParser(parse: (Source) -> T) = parser { src ->
parser { parse(it) }.parse(src)
}
fun <T> parser(name: String? = null, nameSupplier: () -> String = { "" }, parser: (Source) -> T): Parser<T> {
return object : Parser<T> {
override fun parse(source: Source): T {
return parser(source).also {
if (enableAutoSkipSpace) {
while (!source.isEof && source.cursor?.isWhitespace() == true) source.advance()
}
}
}
override fun toString(): String {
return name ?: nameSupplier()
}
}
}
fun eof() = parser("eof") { if (it.isEof) true else throw ParseException("eof", it) }
fun char(c: Char) = parser { src ->
if (src.cursor == c) {
src.advance(); c
} else throw ParseException("parse char: '$c' failed", src)
}
fun Char.asParser() = char(this)
fun digits() = isSatisfy("digit") { it.isDigit() }
fun word() = isSatisfy("word") { it.isLetterOrDigit() }
fun space() = isSatisfy("space", minMatchInclusive = 0) { it.isWhitespace() }
fun isSatisfy(
desc: String = "",
minMatchInclusive: Int = 1,
maxMatchInclusive: Int = Integer.MAX_VALUE,
test: (Char) -> Boolean
) = parser(desc) { src ->
val start = src.position
while (!src.isEof && src.cursor != null && test(src.cursor!!)) {
src.advance()
}
val matchCount = src.position - start
if (matchCount < minMatchInclusive || matchCount > maxMatchInclusive) throw ParseException(
"parse \"$desc\"[$minMatchInclusive - $maxMatchInclusive] failed",
src
)
src.cut(start, src.position)
}
fun stringToParser(chars: String) = parser("is \"$chars\"") { src ->
var index = 0
while (!src.isEof && index < chars.length && chars[index] == src.cursor) {
index++
src.advance()
}
if (index == chars.length) {
chars
} else {
throw ParseException("match chars:\"$chars\" failed", src)
}
}
fun String.asParser(): Parser<String> = stringToParser(this)
fun string(char: Char = '"') = parser { src ->
if (src.isEof || src.cursor != char) {
throw ParseException("except string start, $char", src)
}
src.advance()
val start = src.position
var find = false
while (!src.isEof) {
if (src.cursor == char && src.previous != '\\') {
find = true
break
}
src.advance()
}
src.advance()
if (find) {
src.cut(start, src.position - 1)
} else {
throw ParseException("parse string failed", src)
}
}
///////////////////////////////////////////////////////////////////////////
//
///////////////////////////////////////////////////////////////////////////
fun <T, R> Parser<T>.map(mapper: (T) -> R): Parser<R> =
parser(nameSupplier = { this.toString() }) { mapper(this.parse(it)) }
fun <T> many(parser: Parser<T>, minInclusive: Int = 0, maxInclusive: Int = Integer.MAX_VALUE) = parser { src ->
val collector = mutableListOf<T>()
while (true) {
collector += src.runCatching { parser.parse(src) }.getOrNull() ?: break
if (collector.size > maxInclusive) throw ParseException("exceed max match count, expect max: $maxInclusive", src)
}
if (collector.size < minInclusive) throw ParseException("lower than min match count, except min: $minInclusive", src)
collector
}
fun <T> Parser<T>.many1() = many(this, 1)
fun <T> Parser<T>.many0() = many(this, 0)
fun <T> option(parser: Parser<T>): Parser<Result<T>> =
parser { src -> src.runCatching { parser.parse(it) } }
fun <T> Parser<T>.option_(): Parser<Result<T>> = option(this)
fun <B, S> Parser<B>.sepBy(sep: Parser<S>): Parser<MutableList<B>> = parser { src ->
val collector = mutableListOf<B>()
collector += this.parse(src)
while (true) {
src.runCatching { sep.parse(it) }.getOrNull() ?: break
collector += src.runCatching { this.parse(it) }.getOrNull() ?: break
}
collector
}
infix fun <T> Parser<T>.or(right: Parser<out T>) = parser<T> { src ->
src.runCatching { this.parse(src) }.getOrElse { right.parse(src) }
}
infix fun Parser<String>.or(right: String) = parser { src ->
src.runCatching { this.parse(src) }.getOrElse { right.asParser().parse(src) }
}
infix fun String.or(right: String) = this.asParser() or right.asParser()
infix fun Char.or(right: Char) = this.asParser() or right.asParser()
fun capture(vararg parsers: Parser<*>) = parser<String> { src ->
val start = src.position
parsers.forEach { it.parse(src) }
src.cut(start, src.position)
}
fun <T> Source.runCatching(action: (Source) -> T): Result<T> {
val flag = this.position
return try {
Result.success(action(this))
} catch (e: ParseException) {
this.seek(flag)
Result.failure(e)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment