Last active
April 28, 2026 17:11
-
-
Save moleike/6fa86a3907a9d42dff349a0b53c4e809 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| version = "3.7.15" | |
| runner.dialect = scala3 | |
| maxColumn = 80 | |
| newlines.source = fold | |
| danglingParentheses.preset = false | |
| align.preset = none | |
| indent.main = 2 | |
| indent.callSite = 2 | |
| indent.defnSite = 2 | |
| rewrite.scala3.convertToNewSyntax = true | |
| rewrite.scala3.removeOptionalBraces = yes | |
| binPack.unsafeCallSite = true | |
| optIn.configStyleArguments = false |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.7 | |
| //> using file staged.scala unstaged.scala misc.scala json.scala | |
| //> using dependency org.typelevel::cats-core:2.13.0 | |
| //> using jmh | |
| package benchmarks | |
| import org.openjdk.jmh.annotations.* | |
| import java.util.concurrent.TimeUnit | |
| import org.openjdk.jmh.infra.Blackhole | |
| import java.nio.file.{Files, Paths} | |
| import java.nio.charset.StandardCharsets | |
| def loadJsonFile(path: String): String = | |
| val bytes = Files.readAllBytes(Paths.get(path)) | |
| new String(bytes, StandardCharsets.UTF_8) | |
| @State(Scope.Benchmark) | |
| @BenchmarkMode(Array(Mode.AverageTime)) | |
| @OutputTimeUnit(TimeUnit.MICROSECONDS) | |
| class ParserBench: | |
| @Param(Array("1MB.json", "5MB.json")) | |
| var fileName: String = _ | |
| var input: String = _ | |
| val stagedParser = staged.parsers.jsonParse | |
| val unstagedParser = unstaged.parsers.jsonParse | |
| val miscParser = misc.parsers.jsonParse | |
| @Setup | |
| def setup(): Unit = | |
| input = loadJsonFile(fileName) | |
| @Benchmark | |
| def baseline(blackhole: Blackhole): Unit = | |
| blackhole.consume(miscParser(input)) | |
| @Benchmark | |
| def benchStaged(blackhole: Blackhole): Unit = | |
| blackhole.consume(stagedParser(input)) | |
| @Benchmark | |
| def benchUnstaged(blackhole: Blackhole): Unit = | |
| blackhole.consume(unstagedParser(input)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.7 | |
| package json | |
| enum Json: | |
| case JObject(fields: Map[String, Json]) | |
| case JArray(items: List[Json]) | |
| case JString(value: String) | |
| case JBoolean(value: Boolean) | |
| case JNumber(value: Double) | |
| case JNull |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.7 | |
| //> using file json.scala | |
| package misc | |
| import scala.util.control.TailCalls.* | |
| import json.Json | |
| case class ParseFailure(offset: Int) | |
| type Result[A] = Either[ParseFailure, A] | |
| trait Parser[A] extends ((String, Int) => TailRec[Result[(A, Int)]]): | |
| def parse(in: String): Either[ParseFailure, A] = | |
| this(in, 0).result.map(_._1) | |
| def map[B](f: A => B): Parser[B] = (in, off) => | |
| this(in, off).map { | |
| case Right((a, next)) => Right((f(a), next)) | |
| case Left(fail) => Left(fail) | |
| } | |
| object parsers: | |
| import Json.* | |
| val jsonParse = json.parse | |
| lazy val json: Parser[Json] = (in, off) => | |
| if off < in.length then | |
| in(off) match | |
| case 't' if in.startsWith("true", off) => | |
| done(Right((JBoolean(true), off + 4))) | |
| case 'f' if in.startsWith("false", off) => | |
| done(Right((JBoolean(false), off + 5))) | |
| case 'n' if in.startsWith("null", off) => | |
| done(Right((JNull, off + 4))) | |
| case c if c == '-' || c.isDigit => number(in, off) | |
| case '"' => string(in, off) | |
| case '[' => tailcall(array(in, off)) | |
| case '{' => tailcall(obj(in, off)) | |
| case _ => done(Left(ParseFailure(off))) | |
| else done(Left(ParseFailure(off))) | |
| val string: Parser[Json] = (in, off) => | |
| val end = in.indexOf('"', off + 1) | |
| if end < 0 then done(Left(ParseFailure(off))) | |
| else done(Right((JString(in.substring(off + 1, end)), end + 1))) | |
| val number: Parser[Json] = (in, off) => | |
| val end = in.indexWhere(x => !x.isDigit && x != '.' && x != '-', off) | |
| val e = if end == -1 then in.length else end | |
| done(Right((JNumber(in.substring(off, e).toDouble), e))) | |
| lazy val array: Parser[Json] = | |
| parseSep('[', ']', json).map(JArray(_)) | |
| lazy val obj: Parser[Json] = | |
| parseSep('{', '}', | |
| (in, off) => | |
| // We use .flatMap on the TailRec to sequence the next step without growing the stack | |
| tailcall(json(in, off)).flatMap { | |
| case Right((JString(k), next)) => | |
| val colon = ws(in, next) | |
| if colon < in.length && in(colon) == ':' then | |
| tailcall(json(in, ws(in, colon + 1))).map { | |
| case Right((v, n)) => Right(((k, v), n)) | |
| case Left(fail) => Left(fail) | |
| } | |
| else done(Left(ParseFailure(colon))) | |
| case _ => done(Left(ParseFailure(off))) | |
| } | |
| ).map(kvs => JObject(kvs.toMap)) | |
| def parseSep[A](open: Char, close: Char, p: Parser[A]): Parser[List[A]] = | |
| (in, off) => | |
| val s = ws(in, off) | |
| if in.lift(s).contains(open) then | |
| def loop(c: Int, acc: List[A]): TailRec[Either[ParseFailure, (List[A], Int)]] = | |
| val n = ws(in, c) | |
| in.lift(n) match | |
| case Some(`close`) => | |
| done(Right((acc.reverse, n + 1))) | |
| case Some(',') if acc.nonEmpty => | |
| val afterComma = ws(in, n + 1) | |
| tailcall(p(in, afterComma)).flatMap: | |
| case Left(f) => done(Left(f)) | |
| case Right((v, ro)) => tailcall(loop(ro, v :: acc)) | |
| case _ if acc.isEmpty => | |
| tailcall(p(in, n)).flatMap: | |
| case Left(f) => done(Left(f)) | |
| case Right((v, ro)) => tailcall(loop(ro, v :: acc)) | |
| case _ => | |
| done(Left(ParseFailure(n))) | |
| loop(s + 1, Nil) | |
| else | |
| done(Left(ParseFailure(s))) | |
| def ws(in: String, off: Int): Int = | |
| in.indexWhere(!_.isWhitespace, off) match | |
| case -1 => in.length | |
| case i => i |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import json | |
| import matplotlib.pyplot as plt | |
| import os | |
| # --- 1. Load the JMH data --- | |
| with open('results.json', 'r') as f: | |
| data = json.load(f) | |
| parsers = ['benchUnstaged', 'baseline', 'benchStaged'] | |
| parsers = ['baseline', 'benchStaged', 'benchUnstaged'] | |
| display_names = ['Baseline\n(Handwritten)', 'Staged', 'Non-staged'] | |
| sizes = ['1MB.json', '5MB.json'] | |
| time_data = {p: [0, 0] for p in parsers} | |
| time_err = {p: [0, 0] for p in parsers} | |
| alloc_data = {p: [0, 0] for p in parsers} | |
| alloc_err = {p: [0, 0] for p in parsers} | |
| # --- 2. Extract and convert data --- | |
| for run in data: | |
| bench_name = run.get('benchmark', '') | |
| if 'params' not in run or 'fileName' not in run['params']: | |
| continue | |
| s = run['params']['fileName'] | |
| if s not in sizes: | |
| continue | |
| size_idx = sizes.index(s) | |
| for p in parsers: | |
| if bench_name.endswith(f'.{p}'): | |
| # Time (us to ms) | |
| time_data[p][size_idx] = run['primaryMetric']['score'] / 1000 | |
| err = run['primaryMetric'].get('scoreError', 0) | |
| time_err[p][size_idx] = (err / 1000) if str(err) != 'NaN' else 0 | |
| # Memory (Bytes to MB) | |
| if 'secondaryMetrics' in run and 'gc.alloc.rate.norm' in run['secondaryMetrics']: | |
| gc_metric = run['secondaryMetrics']['gc.alloc.rate.norm'] | |
| alloc_data[p][size_idx] = gc_metric['score'] / (1024 * 1024) | |
| gc_err = gc_metric.get('scoreError', 0) | |
| alloc_err[p][size_idx] = (gc_err / (1024 * 1024)) if str(gc_err) != 'NaN' else 0 | |
| # --- 3. Plotting Setup --- | |
| x = range(len(parsers)) | |
| width = 0.35 | |
| x1 = [i - width/2 for i in x] | |
| x2 = [i + width/2 for i in x] | |
| # Helper for the text labels | |
| def make_labels(data_list, err_list): | |
| return [f"{val:.1f}\n± {err:.2f}" for val, err in zip(data_list, err_list)] | |
| # ---------------------------- | |
| # Figure 1: Speed (Logarithmic) | |
| # ---------------------------- | |
| print("Generating benchmark_speed.png...") | |
| fig1, ax1 = plt.subplots(figsize=(8, 6)) | |
| rects1_1 = ax1.bar(x1, [time_data[p][0] for p in parsers], width, yerr=[time_err[p][0] for p in parsers], label='1MB Payload', capsize=5, color='#1f77b4') | |
| rects1_2 = ax1.bar(x2, [time_data[p][1] for p in parsers], width, yerr=[time_err[p][1] for p in parsers], label='5MB Payload', capsize=5, color='#ff7f0e') | |
| # Log scale configuration | |
| ax1.set_yscale('log') | |
| ax1.set_ylim(bottom=1) # Log scales can't start at 0, 1 is a safe floor here | |
| ax1.grid(True, which="both", axis="y", ls="--", alpha=0.3) | |
| ax1.set_ylabel('Execution Time (ms/op) - Log Scale', fontsize=12) | |
| ax1.set_xticks(x) | |
| ax1.set_xticklabels(display_names, fontsize=11) | |
| ax1.legend(fontsize=11) | |
| # Add explicit text labels | |
| ax1.bar_label(rects1_1, padding=3, labels=make_labels([time_data[p][0] for p in parsers], [time_err[p][0] for p in parsers]), fontsize=9) | |
| ax1.bar_label(rects1_2, padding=3, labels=make_labels([time_data[p][1] for p in parsers], [time_err[p][1] for p in parsers]), fontsize=9) | |
| # Expand Y limit multiplicatively so the text fits on a log scale | |
| ax1.set_ylim(top=max([time_data[p][1] for p in parsers]) * 3.5) | |
| fig1.tight_layout() | |
| fig1.savefig('benchmark_speed.png', dpi=300, bbox_inches='tight') | |
| plt.close(fig1) | |
| # ---------------------------- | |
| # Figure 2: Memory (Logarithmic) | |
| # ---------------------------- | |
| print("Generating benchmark_memory.png...") | |
| fig2, ax2 = plt.subplots(figsize=(8, 6)) | |
| rects2_1 = ax2.bar(x1, [alloc_data[p][0] for p in parsers], width, yerr=[alloc_err[p][0] for p in parsers], label='1MB Payload', capsize=5, color='#2ca02c') | |
| rects2_2 = ax2.bar(x2, [alloc_data[p][1] for p in parsers], width, yerr=[alloc_err[p][1] for p in parsers], label='5MB Payload', capsize=5, color='#d62728') | |
| # Log scale configuration | |
| ax2.set_yscale('log') | |
| ax2.set_ylim(bottom=1) | |
| ax2.grid(True, which="both", axis="y", ls="--", alpha=0.3) | |
| ax2.set_ylabel('Memory Allocated (MB/op) - Log Scale', fontsize=12) | |
| ax2.set_xticks(x) | |
| ax2.set_xticklabels(display_names, fontsize=11) | |
| ax2.legend(fontsize=11) | |
| # Add explicit text labels | |
| ax2.bar_label(rects2_1, padding=3, labels=make_labels([alloc_data[p][0] for p in parsers], [alloc_err[p][0] for p in parsers]), fontsize=9) | |
| ax2.bar_label(rects2_2, padding=3, labels=make_labels([alloc_data[p][1] for p in parsers], [alloc_err[p][1] for p in parsers]), fontsize=9) | |
| # Expand Y limit multiplicatively | |
| ax2.set_ylim(top=max([alloc_data[p][1] for p in parsers]) * 3.5) | |
| fig2.tight_layout() | |
| fig2.savefig('benchmark_memory.png', dpi=300, bbox_inches='tight') | |
| plt.close(fig2) | |
| print(f"Success! Both log-scaled images saved in: {os.getcwd()}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.7 | |
| package staged | |
| import scala.quoted.* | |
| import scala.util.control.TailCalls.{tailcall, done, TailRec} | |
| import json.Json | |
| case class ParseFailure(offset: Int) | |
| type Parser[A] = Quotes ?=> Gen[A] | |
| opaque type Gen[A] = ( | |
| Expr[String], | |
| Expr[Int], | |
| Parser.internal.Cont[A]) => Expr[Parser.internal.Res] | |
| object Parser: | |
| object internal: | |
| type Res = TailRec[Unit] | |
| case class Cont[A]( | |
| succ: (Expr[A], Expr[Int]) => Expr[Res], | |
| fail: Expr[Int] => Expr[Res]) | |
| object Cont: | |
| def apply[A: Type](k: Expr[((Any, Int) => Res, Int => Res)])(using | |
| Quotes): Cont[A] = Cont(succ = (v, n) => '{ tailcall($k._1($v, $n)) }, | |
| fail = fOff => '{ tailcall($k._2($fOff)) }) | |
| def lower[A: Type](k: Cont[A])(using | |
| Quotes): Expr[((Any, Int) => Res, Int => Res)] = '{ | |
| ((v: Any, n: Int) => ${ k.succ('{ v.asInstanceOf[A] }, 'n) }, | |
| (fOff: Int) => ${ k.fail('fOff) }) | |
| } | |
| import internal.* | |
| inline def apply[A: Type]( | |
| inline p: (String, Int) => Option[(A, Int)]): Parser[A] = (in, off, k) => | |
| '{ | |
| p($in, $off) match | |
| case Some((a, next)) => ${ k.succ('a, 'next) } | |
| case None => ${ k.fail(off) } | |
| } | |
| inline def empty[A: Type]: Parser[A] = (_, off, k) => k.fail(off) | |
| inline def pure[A: Type](inline value: A): Parser[A] = | |
| (_, off, k) => k.succ('{ value }, off) | |
| def fix[A: Type](f: Parser[A] => Parser[A]): Parser[A] = (in, off, k) => | |
| '{ | |
| def loop(o: Int, k: ((Any, Int) => Res, Int => Res)): Res = ${ | |
| f((_, nOff, nK) => '{ tailcall(loop($nOff, ${ Cont.lower(nK) })) })( | |
| in, 'o, Cont('k)) | |
| } | |
| tailcall(loop($off, ${ Cont.lower(k) })) | |
| } | |
| inline def satisfy(inline f: Char => Boolean): Parser[Char] = (in, off, k) => | |
| '{ | |
| if $off < $in.length && ${ Expr.betaReduce('{ f($in.charAt($off)) }) } | |
| then ${ k.succ('{ $in.charAt($off) }, '{ $off + 1 }) } | |
| else ${ k.fail(off) } | |
| } | |
| def ws: Parser[Unit] = (in, off, k) => | |
| '{ | |
| val n = $in.indexWhere(!_.isWhitespace, $off) | |
| val next = if n < 0 then $in.length else n | |
| ${ k.succ('{ () }, 'next) } | |
| } | |
| def takeUntil(c: Char): Parser[String] = (in, off, k) => | |
| '{ | |
| val n = $in.indexOf(${ Expr(c) }, $off) | |
| val next = if n < 0 then $in.length else n | |
| ${ k.succ('{ $in.substring($off, next) }, 'next) } | |
| } | |
| def string(s: String): Parser[String] = (in, off, k) => | |
| '{ | |
| if $in.startsWith(${ Expr(s) }, $off) then | |
| ${ k.succ(Expr(s), '{ $off + ${ Expr(s.length) } }) } | |
| else ${ k.fail(off) } | |
| } | |
| inline def char(inline c: Char): Parser[Char] = satisfy(_ == c) | |
| inline def takeWhile(inline f: Char => Boolean): Parser[String] = satisfy(f) | |
| .skipMany.slice | |
| inline def takeWhile1(inline f: Char => Boolean): Parser[String] = satisfy(f) | |
| .skipMany1.slice | |
| inline def skipWhile(inline f: Char => Boolean): Parser[Unit] = satisfy(f) | |
| .skipMany | |
| inline def lexeme[A](inline p: Parser[A]): Parser[A] = p <* ws | |
| inline def tok(inline c: Char): Parser[Char] = lexeme(char(c)) | |
| inline def tok(inline s: String): Parser[String] = lexeme(string(s)) | |
| extension [A: Type](p: Parser[A]) | |
| def compile(using Quotes): Expr[String => Either[ParseFailure, A]] = | |
| val pp = p.asInstanceOf[Gen[Any]] | |
| '{ (in: String) => | |
| var res: Either[ParseFailure, Any] = null | |
| ${ | |
| pp('{ in }, '{ 0 }, | |
| Cont((v, off) => '{ res = Right($v); done(()) }, | |
| fOff => '{ res = Left(ParseFailure($fOff)); done(()) })) | |
| }.result | |
| res.asInstanceOf[Either[ParseFailure, A]] | |
| } | |
| def ~[B: Type](that: Parser[B]): Parser[(A, B)] = (in, off, k) => | |
| p(in, off, | |
| Cont((a, o1) => | |
| that(in, o1, Cont((b, o2) => k.succ('{ ($a, $b) }, o2), k.fail)), | |
| k.fail)) | |
| def |(that: Parser[A])(using Quotes): Parser[A] = (in, off, k) => | |
| '{ | |
| def succ(res: A, next: Int): Res = ${ k.succ('res, 'next) } | |
| ${ | |
| val k2 = | |
| Cont((res: Expr[A], o) => '{ tailcall(succ($res, $o)) }, k.fail) | |
| p(in, off, | |
| Cont(k2.succ, | |
| f1 => | |
| that(in, off, | |
| Cont(k2.succ, | |
| f2 => k2.fail('{ if $f1 > $f2 then $f1 else $f2 }))))) | |
| } | |
| } | |
| inline def map[B: Type](inline f: A => B): Parser[B] = (in, off, k) => | |
| p(in, off, | |
| Cont((a, next) => k.succ(Expr.betaReduce('{ f($a) }), next), k.fail)) | |
| inline def as[B: Type](inline value: B): Parser[B] = p.map(_ => value) | |
| def *>[B: Type](other: Parser[B]): Parser[B] = (p ~ other).map(_._2) | |
| def <*[B: Type](other: Parser[B]): Parser[A] = (p ~ other).map(_._1) | |
| inline def fold[B: Type](inline b: B)(inline f: (B, A) => B): Parser[B] = | |
| (in, off, k) => | |
| '{ | |
| def loop(cOff: Int, acc: B): Res = ${ | |
| p(in, 'cOff, | |
| Cont((a, next) => | |
| '{ | |
| tailcall(loop($next, ${ Expr.betaReduce('{ f(acc, $a) }) })) | |
| }, _ => k.succ('acc, 'cOff))) | |
| } | |
| tailcall(loop($off, b)) | |
| } | |
| def many: Parser[List[A]] = p.fold(List.empty[A])((acc, a) => a :: acc) | |
| .map(_.reverse) | |
| def many1: Parser[List[A]] = (p ~ p.many).map(_ :: _) | |
| def skipMany: Parser[Unit] = p.fold(())((_, _) => ()) | |
| def skipMany1: Parser[Unit] = p *> p.skipMany | |
| def sepBy[B: Type](sep: Parser[B]): Parser[List[A]] = p.sepBy1(sep) | | |
| pure(List.empty[A]) | |
| def sepBy1[B: Type](sep: Parser[B]): Parser[List[A]] = (p ~ (sep *> p).many) | |
| .map(_ :: _) | |
| def ? : Parser[Option[A]] = p.map(Option(_)) | pure(None) | |
| def slice: Parser[String] = (in, off, k) => | |
| p(in, off, | |
| Cont( | |
| (_, next) => k.succ('{ $in.substring($off, $next) }, next), k.fail)) | |
| inline def between(inline open: Char, inline close: Char): Parser[A] = | |
| tok(open) *> lexeme(p) <* tok(close) | |
| object parsers: | |
| import Parser.* | |
| val json: Parser[Json] = fix: self => | |
| import Json.* | |
| val str = takeUntil('"').between('"', '"') | |
| val jNull = string("null").as(JNull) | |
| val jBool = (string("true").as(true) | string("false").as(false)) | |
| .map(JBoolean(_)) | |
| val jStr = str.map(JString(_)) | |
| val jNum = takeWhile1(c => c == '-' || c == '.' || c.isDigit) | |
| .map(_.toDouble).map(JNumber(_)) | |
| val jArr = lexeme(self).sepBy(tok(',')).between('[', ']').map(JArray(_)) | |
| val jObj = ((str <* tok(':')) ~ lexeme(self)).sepBy(tok(',')) | |
| .between('{', '}').map(fs => Json.JObject(fs.toMap)) | |
| jNull | jBool | jNum | jStr | jArr | jObj | |
| def impl(using Quotes) = json.compile | |
| inline def jsonParse: String => Either[ParseFailure, Json] = ${ impl } | |
| val p: Parser[Unit] = fix { self => | |
| char('[') *> (self | pure(())) <* char(']') | |
| } | |
| def bkt(using Quotes) = p.compile | |
| inline def balanced: String => Either[ParseFailure, Unit] = ${ bkt } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.7 | |
| //> using file unstaged.scala staged.scala misc.scala json.scala | |
| @main def runParser3(): Unit = | |
| import java.nio.file.{Files, Paths} | |
| import java.nio.charset.StandardCharsets | |
| def loadJsonFile(path: String): String = | |
| val bytes = Files.readAllBytes(Paths.get(path)) | |
| new String(bytes, StandardCharsets.UTF_8) | |
| val validateNested = staged.parsers.balanced | |
| println(validateNested("[[[[[[]]]]]]")) | |
| println(validateNested("[" * 5001 + "]" * 5000)) | |
| //val nested = "(" * 5000 + "foo" + ")" * 5000 | |
| val stagedParser = staged.parsers.jsonParse | |
| val unstagedParser = unstaged.parsers.jsonParse | |
| val miscParser = misc.parsers.jsonParse | |
| val input = loadJsonFile("5MB.json") | |
| println(s"${staged.parsers.id("foo")}") | |
| assert(miscParser(input) == stagedParser(input)) | |
| assert(unstagedParser(input) == stagedParser(input)) | |
| stagedParser("""{"foo": false , "bar": [42]}""") match | |
| case Right(json) => println(json) | |
| case Left(off) => println(s"Failed to parse at $off") | |
| //val input2 = loadJsonFile("5MB.json") | |
| //stagedParser(input2) match | |
| // case Some(json) => println(json) | |
| // case None => println("Failed to parse :(") | |
| //assert(unstagedParser(input2) == stagedParser(input2)) | |
| //inline def sexpParse: String => Option[Sexp] = ${ sexpImpl } | |
| //val nested = "(" * 5000 + "foo" + ")" * 5000 | |
| //assert(sexpParse(nested).isDefined) | |
| //val input2 = """(foo bar (baz (quux) ()))""" | |
| //println(sexpParse(input2)) | |
| //result2 match { | |
| // case Some(sexp) => | |
| // println(s"Success!") | |
| // case None => | |
| // println("Failed to parse") | |
| //} | |
| // def measureDepth(s: Sexp, currentDepth: Int = 0): Int = s match { | |
| // case Sexp.Sym(_) => currentDepth | |
| // case Sexp.Seq(items) => | |
| // items.headOption match { | |
| // case Some(inner) => measureDepth(inner, currentDepth + 1) | |
| // case None => currentDepth | |
| // } | |
| // } | |
| // result2 match { | |
| // case Some(sexp) => | |
| // println(s"Success! Total nesting depth: ${measureDepth(sexp)}") | |
| // case None => | |
| // println("Failed to parse") | |
| // } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //> using scala 3.3.7 | |
| //> using file json.scala | |
| //> using dependency org.typelevel::cats-core:2.13.0 | |
| package unstaged | |
| import cats.{ApplicativeError, Defer, Eval, MonoidK} | |
| import cats.syntax.all.* | |
| import json.Json | |
| case class ParseFailure(offset: Int) | |
| case class Parser[A] ( | |
| run: (String, Int) => Eval[Either[ParseFailure, (A, Int)]] | |
| ): | |
| def parse(input: String): Either[ParseFailure, A] = | |
| run(input, 0).value.map(_._1) | |
| object Parser: | |
| given Defer[Parser] with | |
| def defer[A](p: => Parser[A]): Parser[A] = | |
| Parser((in, offset) => Eval.defer(p.run(in, offset))) | |
| given app: ApplicativeError[Parser, ParseFailure] with | |
| def pure[A](x: A): Parser[A] = | |
| Parser((_, off) => Eval.now(Right((x, off)))) | |
| def ap[A, B](ff: Parser[A => B])(fa: Parser[A]): Parser[B] = | |
| Parser((in, off) => | |
| ff.run(in, off).flatMap { | |
| case Right((f, nextOff)) => | |
| fa.run(in, nextOff).map(_.map((a, n2) => (f(a), n2))) | |
| case Left(err) => Eval.now(Left(err)) | |
| } | |
| ) | |
| def raiseError[A](e: ParseFailure): Parser[A] = | |
| Parser((_, _) => Eval.now(Left(e))) | |
| def handleErrorWith[A]( | |
| fa: Parser[A] | |
| )(f: ParseFailure => Parser[A]): Parser[A] = | |
| Parser((in, off) => | |
| fa.run(in, off).flatMap { | |
| case Right(success) => Eval.now(Right(success)) | |
| case Left(err1) => | |
| f(err1).run(in, off).map { | |
| case Left(err2) => | |
| Left(if err1.offset > err2.offset then err1 else err2) | |
| case Right(res) => Right(res) | |
| } | |
| } | |
| ) | |
| given MonoidK[Parser] with | |
| def empty[A]: Parser[A] = | |
| Parser((_, off) => Eval.now(Left(ParseFailure(off)))) | |
| def combineK[A](x: Parser[A], y: Parser[A]): Parser[A] = | |
| app.handleErrorWith(x)(_ => y) | |
| def satisfy(p: Char => Boolean): Parser[Char] = | |
| Parser((in, off) => | |
| Eval.now { | |
| if off < in.length && p(in(off)) then Right((in(off), off + 1)) | |
| else Left(ParseFailure(off)) | |
| } | |
| ) | |
| def char(c: Char): Parser[Char] = satisfy(_ == c) | |
| def string(s: String): Parser[String] = | |
| Parser: (in, off) => | |
| Eval.now { | |
| if in.startsWith(s, off) then Right((s, off + s.length)) | |
| else Left(ParseFailure(off)) | |
| } | |
| def takeUntil(c: Char): Parser[String] = | |
| Parser: (in, off) => | |
| Eval.now { | |
| val idx = in.indexOf(c, off) | |
| val end = if idx < 0 then in.length else idx | |
| Right((in.substring(off, end), end)) | |
| } | |
| def ws: Parser[Unit] = | |
| Parser((in, off) => | |
| Eval.now { | |
| val n = in.indexWhere(!_.isWhitespace, off) | |
| val next = if n == -1 then in.length else n | |
| Right(((), next)) | |
| } | |
| ) | |
| def lexeme[A](p: Parser[A]): Parser[A] = p <* ws | |
| def tok(c: Char): Parser[Char] = char(c) <* ws | |
| def tok(s: String): Parser[String] = string(s) <* ws | |
| inline def takeWhile(inline f: Char => Boolean): Parser[String] = | |
| satisfy(f).skipMany.slice | |
| inline def takeWhile1(inline f: Char => Boolean): Parser[String] = | |
| satisfy(f).skipMany1.slice | |
| inline def skipWhile(inline f: Char => Boolean): Parser[Unit] = | |
| satisfy(f).skipMany | |
| import Parser.* | |
| extension [A](p: Parser[A])(using | |
| app: ApplicativeError[Parser, ParseFailure], | |
| mk: MonoidK[Parser], | |
| d: Defer[Parser] | |
| ) | |
| def |(other: => Parser[A]): Parser[A] = | |
| mk.combineK(p, other) // Use MonoidK's combineK | |
| def ~[B](other: => Parser[B]): Parser[(A, B)] = | |
| p.product(other) | |
| def fold[B](b: B)(f: (B, A) => B): Parser[B] = Parser: (in, off) => | |
| def loop(currOff: Int, acc: B): Eval[Either[ParseFailure, (B, Int)]] = | |
| Eval | |
| .defer(p.run(in, currOff)) | |
| .flatMap: | |
| case Right((a, next)) => loop(next, f(acc, a)) | |
| case Left(_) => Eval.now(Right((acc, currOff))) | |
| loop(off, b) | |
| def many: Parser[List[A]] = | |
| p.fold(List.empty[A])(_.::(_)).map(_.reverse) | |
| def many1: Parser[List[A]] = | |
| (p ~ p.many).map(_ :: _) | |
| def skipMany: Parser[Unit] = | |
| p.fold(())((_, _) => ()) | |
| def skipMany1: Parser[Unit] = | |
| p.map(_ => ()) *> p.skipMany | |
| def sepBy(sep: Parser[?]): Parser[List[A]] = | |
| sepBy1(sep) | app.pure(List.empty[A]) | |
| def sepBy1(sep: Parser[?]): Parser[List[A]] = | |
| (p ~ (sep *> p).many).map(_ :: _) | |
| def between(`open`: Char, close: Char): Parser[A] = | |
| tok(`open`) *> lexeme(p) <* tok(close) | |
| def ? : Parser[Option[A]] = | |
| p.map(Option(_)) | app.pure(None) | |
| def slice: Parser[String] = | |
| Parser: (in, off) => | |
| p.run(in, off).map(_.map((_, rem) => (in.substring(off, rem), rem))) | |
| object parsers: | |
| val json: Parser[Json] = Defer[Parser].fix: self => | |
| import Json.* | |
| val str = takeUntil('"').between('"', '"') | |
| val jNull = string("null").as(JNull) | |
| val jBool = | |
| (string("true").as(true) | string("false").as(false)).map(JBoolean(_)) | |
| val jStr = str.map(JString(_)) | |
| val jNum = takeWhile1(c => c == '-' || c == '.' || c.isDigit).map( | |
| _.toDouble | |
| ).map(JNumber(_)) | |
| val jArr = lexeme(self).sepBy(tok(',')).between('[', ']').map(JArray(_)) | |
| val jObj = ((str <* tok(':')) ~ lexeme(self)).sepBy(tok(',')).between('{', '}') | |
| .map(fs => Json.JObject(fs.toMap)) | |
| jNull | jBool | jNum | jStr | jArr | jObj | |
| val jsonParse = json.parse | |
| enum Sexp: | |
| case Sym(name: String) | |
| case Seq(items: List[Sexp]) | |
| lazy val sym: Parser[Sexp] = | |
| satisfy(_.isLetter).many1.slice.map(Sexp.Sym(_)) | |
| lazy val seq: Parser[Sexp] = | |
| Defer[Parser].defer(sexp).many.between('(', ')').map(Sexp.Seq(_)) | |
| lazy val sexp: Parser[Sexp] = | |
| sym | seq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment