Skip to content

Instantly share code, notes, and snippets.

@moea
Created May 12, 2026 19:24
Show Gist options
  • Select an option

  • Save moea/dfdb6ad6abdafcc8fc6e1ce93e3f5d9c to your computer and use it in GitHub Desktop.

Select an option

Save moea/dfdb6ad6abdafcc8fc6e1ce93e3f5d9c to your computer and use it in GitHub Desktop.
;; ============================================================================
;; JSON parser, built on algebraic effects + multi-shot continuations,
;; with a homegrown combinator DSL on top of `defmacro`.
;;
;; The bottom of the file (the actual grammar) reads close to BNF:
;;
;; (deftoken null "null" (JNull))
;; (deftoken true "true" (JBool true))
;; (defparser json-array
;; (JArr (between! "[" (sep! json-value ",") "]")))
;;
;; Everything above the grammar is plumbing: Parse + Choose effects,
;; the parser combinator zoo, and a macro layer that turns combinator
;; calls back into something that scans like a grammar.
;;
(namespace examples.json)
;; ----------------------------------------------------------------------------
;; JSON value type
;; ----------------------------------------------------------------------------
(deftype Json []
(JNull)
(JBool bool)
(JNum float)
(JStr string)
(JArr (Vector Json))
(JObj (Map (Pair string Json))))
;; ----------------------------------------------------------------------------
;; Effects
;;
;; Multi-shot continuations matter because the cursor lives in a
;; `with-state` handler, encoded as a state-passing function (see
;; `stdlib/core/effects.stutter`). Each `(resume k …)` re-enters the body
;; with the cursor as it was at the choice point — so `<|>` does NOT
;; need to snapshot / restore the cursor by hand.
;; ----------------------------------------------------------------------------
(effect Parse
(peek :: (-> (Maybe char)))
(advance :: (-> unit))
(pos :: (-> int))
(seek :: (-> int unit)))
(effect Choose
(choose :: (forall [A] (-> (List A) A)))
(fail-choice :: (forall [A] (-> A))))
;; ----------------------------------------------------------------------------
;; Combinators
;; ----------------------------------------------------------------------------
(define satisfy ::
(forall [e] (-> (-> char bool) char ! <Parse Exn ...e>))
(fn [pred]
(match (peek)
((None) (raise :eof))
((Some c) (if (pred c)
(do (advance) c)
(raise :unexpected))))))
(define any-char :: (forall [e] (-> char ! <Parse Exn ...e>))
(fn [] (satisfy (fn [_] true))))
(define char1 :: (forall [e] (-> char char ! <Parse Exn ...e>))
;; Named `char1` rather than `char` to avoid shadowing the `char` type.
(fn [c] (satisfy (fn [x] (char-eq x c)))))
(define eof :: (forall [e] (-> unit ! <Parse Exn ...e>))
(fn []
(match (peek)
((None) :unit)
((Some _) (raise :expected-eof)))))
(define <|>-impl ::
(forall [A e]
(-> (-> A ! <Parse Exn Choose ...e>)
(-> A ! <Parse Exn Choose ...e>)
A ! <Parse Exn Choose ...e>))
(fn [p q]
(if (choose '(true false)) (p) (q))))
(define many ::
(forall [A e]
(-> (-> A ! <Parse Exn Choose ...e>)
(Vector A) ! <Parse Choose ...e>))
(fn [p]
(<|>-impl
(fn [] (let [x (p)] (seq-cons x (many p))))
(fn [] (empty-vec)))))
(define many1 ::
(forall [A e]
(-> (-> A ! <Parse Exn Choose ...e>)
(Vector A) ! <Parse Exn Choose ...e>))
(fn [p] (let [x (p)] (seq-cons x (many p)))))
(define sep-by ::
(forall [A B e]
(-> (-> A ! <Parse Exn Choose ...e>)
(-> B ! <Parse Exn Choose ...e>)
(Vector A) ! <Parse Choose ...e>))
(fn [p sep]
(<|>-impl
(fn [] (let [x (p)
xs (many (fn [] (do (sep) (p))))]
(seq-cons x xs)))
(fn [] (empty-vec)))))
(define between-impl ::
(forall [A B C e]
(-> (-> A ! <Parse Exn Choose ...e>)
(-> B ! <Parse Exn Choose ...e>)
(-> C ! <Parse Exn Choose ...e>)
B ! <Parse Exn Choose ...e>))
(fn [open p close]
(do (open) (let [x (p)] (do (close) x)))))
(define string-literal ::
(forall [e] (-> string string ! <Parse Exn ...e>))
(fn [s]
(let [cs (string->chars s)]
(loop [i 0]
(if (= i (count cs))
s
(do (char1 (nth cs i))
(recur (+ i 1))))))))
(define lexeme ::
(forall [A e]
(-> (-> A ! <Parse Exn Choose ...e>) A ! <Parse Exn Choose ...e>))
(fn [p]
(let [x (p)]
(do (skip-ws) x))))
;; Forward-referenced via `skip-ws`; defined via a macro below once
;; `one-of`/`char-class` are in scope.
;; ============================================================================
;; Lean on pattern-template macros to turn the combinator surface
;; into something that reads like a grammar. Features used:
;; - rest patterns + rest splicing (`?xs ...`) for n-ary forms
;; - recursive expansion for unfolding n-ary into binary
;; - `:when` guards for class-keyword dispatch
;; - `id-join` for generative identifier construction
;; - hygiene: bare-name binders in `(fn [...] B)` are gensym'd
;; per expansion, so the helper macros below don't capture any
;; user-supplied symbol.
;;
;; Everything below this point is pure surface sugar; deleting all the
;; macros would leave a working (if ugly) parser that uses the
;; combinators directly. The point is to show how far the macro system
;; carries us without language-level extension.
;; ============================================================================
;; ---- n-ary <|> ------------------------------------------------------------
;;
;; `(alt P1 P2 P3)` → `(<|>-impl (fn [] P1) (<|>-impl (fn [] P2) (fn [] P3)))`.
;; Recursive expansion unfolds the n-ary form; the empty case is
;; deliberately a parse-error so `(alt)` fails fast at use.
(defmacro alt
[(alt) -> (raise :empty-alt)]
[(alt ?p) -> ?p]
[(alt ?p ?rest ...) -> (<|>-impl (fn [] ?p) (fn [] (alt ?rest ...)))])
;; ---- character-class predicates -------------------------------------------
;;
;; `(any-of? c #\a #\b #\c)` → `(or (char-eq c #\a) (or (char-eq c #\b)
;; (char-eq c #\c)))`. Used to build satisfy predicates from char-literal
;; sets without writing the chained `or` by hand. The variable threaded
;; through is captured by name and re-used at each leaf — hygiene means
;; `c` here doesn't collide with any `c` in user code.
(defmacro any-of?
[(any-of? ?x) -> false]
[(any-of? ?x ?c) -> (char-eq ?x ?c)]
[(any-of? ?x ?c ?rest ...) -> (or (char-eq ?x ?c) (any-of? ?x ?rest ...))])
;; `(char-class :digit)`, `(char-class :alpha)`, etc. — each yields a
;; `(-> char bool)` predicate. The `:any-of` and `:not` shapes compose;
;; pattern matching on the leading keyword keeps the macro readable.
(defmacro char-class
[(char-class :digit) -> char-digit?]
[(char-class :alpha) -> char-alpha?]
[(char-class :whitespace) -> char-whitespace?]
[(char-class :any-of ?cs ...)
-> (fn [c] (any-of? c ?cs ...))]
[(char-class :not ?cls)
-> (fn [c] (not ((char-class ?cls) c)))])
;; `(one-of #\[ #\])` → satisfy-disjunction parser. Eliminates the
;; "satisfy + or-chain of char-eq" pattern entirely.
(defmacro one-of
[(one-of ?cs ...) -> (satisfy (char-class :any-of ?cs ...))])
(defmacro none-of
[(none-of ?cs ...) -> (satisfy (char-class :not (:any-of ?cs ...)))])
;; ---- pure / =>: lift constants & map parser results -----------------------
;; `(pure V)` → a parser thunk that succeeds without consuming and returns V.
(defmacro pure
[(pure ?v) -> (fn [] ?v)])
;; `(=> P F)` → run P, apply F to the result. The function form `F` is
;; substituted as-is; if it's a constructor (`JArr`) or a let-bound fn,
;; the result is direct-style.
(defmacro =>
[(=> ?p ?f) -> (?f (?p))])
;; ---- lexeme + token literals ---------------------------------------------
;; `(token "...")` → match the literal, then skip trailing whitespace.
;; Wrapping in lexeme means tokens normalize whitespace as we go, so
;; the grammar doesn't have to think about it.
(defmacro token
[(token ?s) -> (lexeme (fn [] (string-literal ?s)))])
;; `(between! "[" body "]")` — wraps body between two string-literal
;; tokens. The `!` distinguishes from the underlying `between-impl`
;; combinator; the macro thunks each arg for us.
(defmacro between!
[(between! ?open ?body ?close)
-> (between-impl (fn [] (token ?open))
(fn [] ?body)
(fn [] (token ?close)))])
;; `(sep! p ",")` — sep-by with a literal-string separator.
(defmacro sep!
[(sep! ?p ?sep)
-> (sep-by (fn [] ?p) (fn [] (token ?sep)))])
;; ---- defparser: name + body → top-level parser define --------------------
;;
;; The signature is the canonical row `<Parse Exn Choose>`. Pinning the
;; row at the macro means rule bodies don't have to write the signature
;; per rule — but they're still elaborated against the full effect row,
;; so any effect mismatch surfaces at the body, not at the wrapper.
(defmacro defparser
[(defparser ?name ?ret ?body)
-> (define ?name ::
(forall [e] (-> ?ret ! <Parse Exn Choose ...e>))
(fn [] ?body))])
;; ---- deftoken: keyword literal → constant parser --------------------------
;;
;; `(deftoken null "null" (JNull))` generates:
;;
;; (define json-null :: (forall [e] (-> Json ! <Parse Exn Choose ...e>))
;; (fn [] (do (token "null") (JNull))))
;;
;; The generated identifier `json-null` is built via `id-join` from
;; the literal prefix `"json-"` and the user-supplied tag. This is
;; the most "ambitious" macro in the file — generative naming pulled
;; from a template argument.
(defmacro deftoken
[(deftoken ?tag ?lit ?val)
-> (define (id-join "json-" ?tag) ::
(forall [e] (-> Json ! <Parse Exn Choose ...e>))
(fn [] (do (token ?lit) ?val)))])
;; ---- skip-ws (now that we have one-of-style sugar) -----------------------
(define skip-ws :: (forall [e] (-> unit ! <Parse Choose ...e>))
(fn []
(let [_ (many (fn [] (satisfy (char-class :whitespace))))]
:unit)))
;; ============================================================================
;; This is the payoff. Compare to the pre-macro version: every rule
;; now reads as the grammar production it represents.
;;
;; Stretch goal (TODO(elab)): a single-form `(grammar ...)` macro that
;; emits multiple top-level `define`s in one shot. defmacro
;; expansion produces ONE surf form per macro call, so today a real
;; multi-emit `grammar` macro needs elaborator support for a
;; `(splice-defs (define ...) ...)` form at the top level. Without
;; that, we write one `defparser` / `deftoken` per rule. The
;; per-rule sugar already does most of the work.
;; ============================================================================
;; null / true / false — three rules, three lines.
(deftoken null "null" (JNull))
(deftoken true "true" (JBool true))
(deftoken false "false" (JBool false))
;; ---- numbers --------------------------------------------------------------
(define digit-chars ::
(forall [e] (-> (Vector char) ! <Parse Exn Choose ...e>))
(fn [] (many1 (fn [] (satisfy (char-class :digit))))))
(defparser sign-chars (Vector char)
(alt (let [c (char1 #\-)] (seq-cons c (empty-vec)))
(empty-vec)))
(defparser frac-chars (Vector char)
(alt (let [dot (char1 #\.)
ds (digit-chars)]
(seq-cons dot ds))
(empty-vec)))
(defparser exp-chars (Vector char)
(alt (let [e (alt (char1 #\e) (char1 #\E))
sg (alt (let [c (alt (char1 #\+) (char1 #\-))]
(seq-cons c (empty-vec)))
(empty-vec))
ds (digit-chars)]
(seq-append (seq-cons e sg) ds))
(empty-vec)))
(defparser json-number Json
(lexeme
(fn []
(let [s (sign-chars)
int (digit-chars)
fr (frac-chars)
ex (exp-chars)
raw (chars->string
(seq-append (seq-append s int) (seq-append fr ex)))]
(match (str->float raw)
((None) (raise :bad-number))
((Some f) (JNum f)))))))
;; ---- strings --------------------------------------------------------------
;;
;; The `(=> (any-char) escape-decode)` shape on the escape branch shows
;; the macro layer doing real work: the macro flips control flow to
;; the helper without obscuring intent.
(define escape-decode :: (-> char char)
(fn [c]
(cond
(char-eq c #\") #\"
(char-eq c #\\) #\\
(char-eq c #\/) #\/
(char-eq c #\n) #\newline
(char-eq c #\t) #\tab
(char-eq c #\r) #\return
(char-eq c #\b) (int->char 8)
(char-eq c #\f) (int->char 12)
;; TODO: \uXXXX — read 4 hex digits, int->char. Skipped here.
true (error "json: bad escape"))))
(defparser json-string-char char
(alt (do (char1 #\\) (=> any-char escape-decode))
(none-of #\" #\\)))
(defparser json-string-body string
(=> (fn [] (many json-string-char)) chars->string))
(defparser json-string Json
(lexeme (fn []
(=> (fn []
(between-impl
(fn [] (char1 #\"))
json-string-body
(fn [] (char1 #\"))))
JStr))))
;; ---- arrays & objects ----------------------------------------------------
(defparser json-array Json
(=> (fn [] (between! "[" (sep! (json-value) ",") "]")) JArr))
(defparser json-pair (Pair string Json)
(let [k (lexeme (fn []
(between-impl
(fn [] (char1 #\"))
json-string-body
(fn [] (char1 #\")))))
_ (token ":")
v (json-value)]
(Pair k v)))
(define pairs->map ::
(-> (Vector (Pair string Json)) (Map (Pair string Json)))
(fn [pairs]
(reduce (fn [m p] (map-assoc m (:k p) (:v p)))
${}
pairs)))
(defparser json-object Json
(=> (fn [] (=> (fn [] (between! "{" (sep! (json-pair) ",") "}"))
pairs->map))
JObj))
;; The whole grammar's dispatch — what a recursive-descent parser
;; usually buries in nested if-else, now one `alt` per rule.
(defparser json-value Json
(alt (json-null)
(json-true)
(json-false)
(json-number)
(json-string)
(json-array)
(json-object)))
;; ============================================================================
;; Runners
;; ============================================================================
(deftype Result [A E]
(Ok A)
(Err E))
(define run-first ::
(forall [A e]
(-> (-> A ! <Choose Exn ...e>) (Result A keyword) ! <Exn ...e>))
(fn [thunk]
((handler
(return v -> (Ok v))
((choose xs) k ->
(loop [xs xs]
(if (empty? xs)
(Err :no-alternative)
(let [r (exn-or-default (Err :branch-failed)
(fn [] (resume k (first xs))))]
(match r
((Ok _) r)
((Err _) (recur (rest xs))))))))
((fail-choice) -> (Err :no-alternative)))
thunk)))
(define run-parse ::
(forall [A e]
(-> string (-> A ! <Parse Choose Exn ...e>)
(Result A keyword) ! <...e>))
(fn [src thunk]
(let [buf (string->chars src)]
(with-state 0
(fn []
((handler
(return v -> v)
((peek) k ->
(let [i (get)]
(if (< i (count buf))
(resume k (Some (nth buf i)))
(resume k (None)))))
((advance) k -> (do (put (+ (get) 1)) (resume k :unit)))
((pos) k -> (resume k (get)))
((seek p) k -> (do (put p) (resume k :unit))))
(fn []
(exn-or-default (Err :parse-error)
(fn [] (run-first thunk))))))))))
(define parse-json :: (-> string (Result Json keyword))
(fn [src]
(run-parse src
(fn []
(do (skip-ws)
(let [v (json-value)]
(do (eof) v)))))))
;; ============================================================================
;; Demo
;; ============================================================================
(print "null =>") (print (parse-json "null"))
(print "true =>") (print (parse-json " true "))
(print "42 =>") (print (parse-json "42"))
(print "[1,2,3] =>") (print (parse-json "[1, 2, 3]"))
(print "nested =>") (print (parse-json "{\"a\": 1, \"b\": [true, null]}"))
(print "malformed =>") (print (parse-json "{\"a\": ,}"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment