Last active
October 13, 2017 19:59
-
-
Save silmeth/cab56d9a40a72f4ca39faaa603794280 to your computer and use it in GitHub Desktop.
Example of parsing (subset of) JSON using Kotlin better-parse (https://github.com/h0tk3y/better-parse) parser combinators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.github.h0tk3y.betterParse.grammar.parseToEnd | |
object Main { | |
@JvmStatic | |
fun main(args: Array<String>) { | |
val json = """ | |
{ | |
"Image": { | |
"Width": 800, | |
"Height": 600, | |
"Title": "View from 15th Floor", | |
"Thumbnail": { | |
"Url": "http://www.example.com/image/481989943", | |
"Height": 125, | |
"Width": 100, | |
"Visible": true | |
}, | |
"Animated" : false, | |
"IDs": [1.16E2, 943, 234, 38793], | |
"Array of objects": [{}, {"type": "object in an array"}, null], | |
"Escaped characters": "\n\r\"\t\\", | |
"Non-escaped unicode characters" : "Ążćřǫ × 38.0e5¹²³" | |
} | |
} | |
""" | |
val grammar = SimpleJsonGrammar() | |
val parsed = grammar.parseToEnd(json) | |
assert(parsed is Map<*, *>) | |
val map: Map<String, Any?> = parsed as Map<String, Any?> | |
assert(map["Width"] == 800.0) | |
assert(map["Title"] == "View from 15th Floor") | |
assert(map["IDs"] == listOf(116.0, 943.0, 234.0, 38793.0)) | |
println(parsed) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.github.h0tk3y.betterParse.combinators.and | |
import com.github.h0tk3y.betterParse.combinators.asJust | |
import com.github.h0tk3y.betterParse.combinators.map | |
import com.github.h0tk3y.betterParse.combinators.optional | |
import com.github.h0tk3y.betterParse.combinators.or | |
import com.github.h0tk3y.betterParse.combinators.separated | |
import com.github.h0tk3y.betterParse.combinators.skip | |
import com.github.h0tk3y.betterParse.combinators.use | |
import com.github.h0tk3y.betterParse.grammar.Grammar | |
import com.github.h0tk3y.betterParse.grammar.parser | |
import com.github.h0tk3y.betterParse.parser.Parser | |
class SimpleJsonGrammar : Grammar<Any?>() { | |
val stringLiteral by token("\"[^\\\\\"]*(\\\\[\"nrtbf\\\\][^\\\\\"]*)*\"") | |
val whiteSpace by token("\\s+", true) | |
val colon by token(":") | |
val openingBrace by token("\\{") | |
val closingBrace by token("\\}") | |
val openingBracket by token("\\[") | |
val closingBracket by token("\\]") | |
val nullToken by token("\\bnull\\b") | |
val trueToken by token("\\btrue\\b") | |
val falseToken by token("\\bfalse\\b") | |
val jsonNull: Parser<Any?> = nullToken asJust null | |
val jsonBool: Parser<Boolean> = (trueToken asJust true) or (falseToken asJust false) | |
val comma by token(",") | |
val integer by token("\\d+") | |
val dot by token("\\.") | |
val exponent by token("[eE]") | |
val string: Parser<String> = stringLiteral use { text.substring(1, text.lastIndex - 1) } | |
val exponentPart = skip(exponent) and integer | |
val floatingPointPart = skip(dot) and optional(integer) | |
val onlyFloatingPart = skip(dot) and integer | |
val number: Parser<Double> = ((integer and optional(floatingPointPart)) | |
.map { (int, floatPart) -> | |
int.text + (floatPart?.let { ".${it.text}" } ?: "") | |
} or | |
(onlyFloatingPart map { ".${it.text}" } ) and | |
optional(exponentPart map { "e${it.text}" })) | |
.map { (p1, p2) -> | |
(p1 + (p2 ?: "")).toDouble() | |
} | |
val jsonPrimitiveValue: Parser<Any?> = jsonNull or jsonBool or string or number | |
val jsonObject: Parser<Map<String, Any?>> = (skip(openingBrace) and | |
separated(string and skip(colon) and parser(this::jsonValue), comma, true) and | |
skip(closingBrace)) | |
.map { | |
it.terms.map {(key, v) -> Pair(key, v) }.toMap() | |
} | |
val jsonArray: Parser<List<Any?>> = (skip(openingBracket) and | |
separated(parser(this::jsonValue), comma, true) and | |
skip(closingBracket)) | |
.map { it.terms } | |
val jsonValue: Parser<Any?> = jsonPrimitiveValue or jsonObject or jsonArray | |
override val rootParser = jsonValue | |
} |
Also, even though it correctly recognizes regular escaped special characters (eg. "This string,\ncalled \"inner quoted\"\nparses just fine"
) currently parses as:
This string,\ncalled \"inner quoted\"\nparses just fine
instead of expected:
This string,
called "inner quoted"
parses just fine
But this is easy to correct (one needs to handle escape characters during mapping of the string
parser).
Improved version of this parser lives in this repo.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Escaped unicode characters (eg.
"\u2192\uD83D\uDE00"
) are not parsed by this grammar atm.