Last active
May 30, 2022 18:55
-
-
Save sug0/71421d959c18d8a2b038fc0590096c06 to your computer and use it in GitHub Desktop.
Parser combinators in Chez Scheme
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; TODO: add backtracking point, to avoid parsing all the way from the beginning! | |
;; | |
;; e.g. | |
;; | |
;; (json-value (new-string-state "[\" \" 123]")) | |
;; ==> (#f "Exhausted all possible JSON value types: Failed to parse JSON number: Expected character '9', but found '[' instead") | |
;; | |
;; we know we are looking for a comma, so add a backtracking point after the string | |
;; return 1 >>= thing1 >>= thing2 >>= thing3 | |
;; (=<< thing3 (=<< thing2 (=<< thing1 (return 1)))) | |
;; parser outputs pair (#f error-msg) or (#t value next-state) | |
(define (new-string-state str) | |
(cons str 0)) | |
(define (string-state state) | |
(car state)) | |
(define (string-state-head state) | |
(string-ref (car state) (cdr state))) | |
(define (string-state-next state) | |
(cons (car state) (+ 1 (cdr state)))) | |
(define (string-state-eof? state) | |
(>= (cdr state) (string-length (car state)))) | |
(define (result-ok? result) | |
(car result)) | |
;; assumes (car result) is false | |
(define (result-error result) | |
(cadr result)) | |
;; assumes (car result) is true | |
(define (result-value result) | |
(cadr result)) | |
(define (result-next-state result) | |
(caddr result)) | |
(define (new-result-ok value next-state) | |
(list #t value next-state)) | |
(define (new-result-err msg) | |
(list #f msg)) | |
;; define this macro before being used | |
(define-syntax parser-do | |
(syntax-rules (<- := %=) | |
[(_ (:= var value) rest ...) | |
(let ([var value]) | |
(parser-do rest ...))] | |
[(_ (%= var value) rest ...) | |
(letrec ([var value]) | |
(parser-do rest ...))] | |
[(_ (<- var parser) rest ...) | |
(parser-bind | |
parser | |
(lambda (var) | |
(parser-do rest ...)))] | |
[(_ parser) parser] | |
[(_ parser rest ...) | |
(parser-bind | |
parser | |
(lambda (_) | |
(parser-do rest ...)))])) | |
(define-syntax parser-do-inspect | |
(syntax-rules () | |
[(_ . clauses) (_parser-do-syntax 'clauses)])) | |
(define-syntax parser-do-hygienic | |
(syntax-rules () | |
[(_ . clauses) (eval (_parser-do-syntax 'clauses))])) | |
(define (_parser-do-syntax clauses) | |
(let ([head (car clauses)] | |
[tail (cdr clauses)]) | |
(if (null? tail) | |
head | |
(cond | |
[(eq? (car head) ':=) | |
`(let ([,(cadr head) ,(caddr head)]) | |
,(_parser-do-syntax tail))] | |
[(eq? (car head) '%=) | |
`(letrec ([,(cadr head) ,(caddr head)]) | |
,(_parser-do-syntax tail))] | |
[(eq? (car head) '<-) | |
`(parser-bind | |
,(caddr head) | |
(lambda (,(cadr head)) | |
,(_parser-do-syntax tail)))] | |
[else | |
`(parser-bind | |
,head | |
(lambda (_) | |
,(_parser-do-syntax tail)))])))) | |
(define (parser-map func parser) | |
(parser-do | |
(<- x parser) | |
(parser-return (func x)))) | |
(define (parser-return x) | |
(lambda (state) | |
(new-result-ok x state))) | |
(define (parser-or p1 p2) | |
(lambda (state) | |
(let ([result (p1 state)]) | |
(if (result-ok? result) | |
result | |
(p2 state))))) | |
(define (tag error-msg parser) | |
(lambda (state) | |
(let ([result (parser state)]) | |
(if (result-ok? result) | |
result | |
(new-result-err (format "~a: ~a" | |
error-msg | |
(result-error result))))))) | |
(define (tag2 error-msg parser) | |
(parser-or parser | |
(parser-fail error-msg))) | |
(define (opt parser) | |
(parser-or parser | |
(parser-return #f))) | |
(define (parser-and p1 p2) | |
(parser-do | |
(<- x p1) | |
(<- y p2) | |
(parser-return (cons x y)))) | |
(define (parser-bind parser continuation) | |
(lambda (state) | |
(let ([result (parser state)]) | |
(if (result-ok? result) | |
;; continue computations | |
(let ([new-parser (continuation | |
(result-value result))]) | |
(new-parser (result-next-state result))) | |
;; short-circuit | |
result)))) | |
(define (=<< continuation parser) | |
(parser-bind parser continuation)) | |
(define (parser-fail error-msg) | |
(lambda (state) (new-result-err error-msg))) | |
(define (parser-then . parsers) | |
(define (parser-then-impl parsers) | |
(if (null? (cdr parsers)) | |
(parser-do | |
(<- value (car parsers)) | |
(parser-return value)) | |
(parser-do | |
(car parsers) | |
(parser-then-impl (cdr parsers))))) | |
(parser-then-impl parsers)) | |
(define anychar | |
(lambda (state) | |
(cond | |
[(string-state-eof? state) | |
(new-result-err "End of file")] | |
[else | |
(new-result-ok (string-state-head state) (string-state-next state))]))) | |
(define (char x) | |
(parser-do | |
(<- y anychar) | |
(if (eq? x y) | |
(parser-return x) | |
(parser-fail (format "Expected character '~a', but found '~a' instead" x y))))) | |
(define (any . parsers) | |
(fold-left parser-or (car parsers) (cdr parsers))) | |
(define space | |
(any (char #\space) | |
(char #\tab) | |
(char #\newline))) | |
(define digit | |
(any (char #\0) (char #\1) (char #\2) (char #\3) | |
(char #\4) (char #\5) (char #\6) (char #\7) | |
(char #\8) (char #\9))) | |
(define (pstring str) | |
(parser-map | |
list->string | |
(parser-sequence (map char (string->list str))))) | |
(define (parser-when cond parser) | |
(if cond parser (parser-return #f))) | |
(define (parser-sequence parsers) | |
(if (null? parsers) | |
(parser-return '()) | |
(parser-do | |
(:= parser (car parsers)) | |
(:= tail (cdr parsers)) | |
(<- value parser) | |
(<- tail (parser-sequence tail)) | |
(parser-return | |
(cons value tail))))) | |
(define (pmany parser) | |
(parser-or (pmany1 parser) | |
(parser-return '()))) | |
(define (pmany1 parser) | |
(parser-end parser | |
(parser-return '()))) | |
(define (parser-end parser last) | |
(parser-do | |
(<- head parser) | |
(<- tail (parser-or (parser-end parser last) | |
last)) | |
(parser-return (cons head tail)))) | |
(define (parser-between p1 p2 p3) | |
(parser-do | |
p1 | |
(<- value p2) | |
p3 | |
(parser-return value))) | |
(define json-null | |
(tag "Failed to parse JSON null" | |
(parser-then (pstring "null") | |
(parser-return 'null)))) | |
(define json-bool | |
(tag "Failed to parse JSON bool" | |
(parser-or (parser-then (pstring "true") (parser-return #t)) | |
(parser-then (pstring "false") (parser-return #f))))) | |
(define json-number | |
(tag "Failed to parse JSON number" | |
(parser-map (lambda (num) (string->number (list->string num))) | |
(pmany1 digit)))) | |
(define json-string | |
(tag "Failed to parse JSON string" | |
(parser-do | |
(:= not-quote | |
(parser-do | |
(<- ch anychar) | |
(parser-when (eq? ch #\") | |
(parser-fail #f)) | |
(parser-return ch))) | |
(char #\") | |
(<- s (pmany not-quote)) | |
(char #\") | |
(parser-map list->string (parser-return s))))) | |
(define json-dict | |
(tag "Failed to parse JSON dictionary" | |
(parser-do | |
(:= json-key-val | |
(parser-do | |
(<- k json-string) | |
(pmany space) | |
(char #\:) | |
(pmany space) | |
(<- v json-value) | |
(parser-return (cons k v)))) | |
(%= json-dict-parse | |
(parser-do | |
(pmany space) | |
(<- next (parser-or (char #\}) | |
(char #\,))) | |
(if (eq? next #\}) | |
(parser-return '()) | |
(parser-do | |
(pmany space) | |
(<- next-key-val json-key-val) | |
(<- tail json-dict-parse) | |
(parser-return (cons next-key-val tail)))))) | |
(:= json-dict-parse-begin | |
(parser-do | |
(char #\{) | |
(pmany space) | |
(<- next (parser-or (char #\}) | |
json-key-val)) | |
(if (eq? next #\}) | |
(parser-return '()) | |
(parser-do | |
(<- tail json-dict-parse) | |
(parser-return (cons next tail)))))) | |
json-dict-parse-begin))) | |
(define json-array | |
(tag "Failed to parse JSON array" | |
(parser-do | |
(%= json-array-parse | |
(parser-do | |
(pmany space) | |
(<- next (parser-or (char #\]) | |
(char #\,))) | |
(if (eq? next #\]) | |
(parser-return '()) | |
(parser-do | |
(pmany space) | |
(<- next json-value) | |
(<- tail json-array-parse) | |
(parser-return (cons next tail)))))) | |
(:= json-array-parse-begin | |
(parser-do | |
(char #\[) | |
(pmany space) | |
(<- next (parser-or (char #\]) | |
json-value)) | |
(if (eq? next #\]) | |
(parser-return '()) | |
(parser-do | |
(<- tail json-array-parse) | |
(parser-return (cons next tail)))))) | |
json-array-parse-begin))) | |
(define json-value | |
(tag "Exhausted all possible JSON value types" | |
(any json-null | |
json-bool | |
json-string | |
json-array | |
json-dict | |
json-number))) | |
;; (json-value (new-string-state "[1, 2, 3, 4, 5]")) | |
;; ==> (#t (1 2 3 4 5) ("[1, 2, 3, 4, 5]" . 15)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment