Skip to content

Instantly share code, notes, and snippets.

@sug0
Last active May 30, 2022 18:55
Show Gist options
  • Save sug0/71421d959c18d8a2b038fc0590096c06 to your computer and use it in GitHub Desktop.
Save sug0/71421d959c18d8a2b038fc0590096c06 to your computer and use it in GitHub Desktop.
Parser combinators in Chez Scheme
;; TODO: add backtracking point, to avoid parsing all the way from the beginning!
;;
;; e.g.
;;
;; (json-value (new-string-state "[\" \" 123]"))
;; ==> (#f "Exhausted all possible JSON value types: Failed to parse JSON number: Expected character '9', but found '[' instead")
;;
;; we know we are looking for a comma, so add a backtracking point after the string
;; return 1 >>= thing1 >>= thing2 >>= thing3
;; (=<< thing3 (=<< thing2 (=<< thing1 (return 1))))
;; parser outputs pair (#f error-msg) or (#t value next-state)
(define (new-string-state str)
(cons str 0))
(define (string-state state)
(car state))
(define (string-state-head state)
(string-ref (car state) (cdr state)))
(define (string-state-next state)
(cons (car state) (+ 1 (cdr state))))
(define (string-state-eof? state)
(>= (cdr state) (string-length (car state))))
(define (result-ok? result)
(car result))
;; assumes (car result) is false
(define (result-error result)
(cadr result))
;; assumes (car result) is true
(define (result-value result)
(cadr result))
(define (result-next-state result)
(caddr result))
(define (new-result-ok value next-state)
(list #t value next-state))
(define (new-result-err msg)
(list #f msg))
;; define this macro before being used
(define-syntax parser-do
(syntax-rules (<- := %=)
[(_ (:= var value) rest ...)
(let ([var value])
(parser-do rest ...))]
[(_ (%= var value) rest ...)
(letrec ([var value])
(parser-do rest ...))]
[(_ (<- var parser) rest ...)
(parser-bind
parser
(lambda (var)
(parser-do rest ...)))]
[(_ parser) parser]
[(_ parser rest ...)
(parser-bind
parser
(lambda (_)
(parser-do rest ...)))]))
(define-syntax parser-do-inspect
(syntax-rules ()
[(_ . clauses) (_parser-do-syntax 'clauses)]))
(define-syntax parser-do-hygienic
(syntax-rules ()
[(_ . clauses) (eval (_parser-do-syntax 'clauses))]))
(define (_parser-do-syntax clauses)
(let ([head (car clauses)]
[tail (cdr clauses)])
(if (null? tail)
head
(cond
[(eq? (car head) ':=)
`(let ([,(cadr head) ,(caddr head)])
,(_parser-do-syntax tail))]
[(eq? (car head) '%=)
`(letrec ([,(cadr head) ,(caddr head)])
,(_parser-do-syntax tail))]
[(eq? (car head) '<-)
`(parser-bind
,(caddr head)
(lambda (,(cadr head))
,(_parser-do-syntax tail)))]
[else
`(parser-bind
,head
(lambda (_)
,(_parser-do-syntax tail)))]))))
(define (parser-map func parser)
(parser-do
(<- x parser)
(parser-return (func x))))
(define (parser-return x)
(lambda (state)
(new-result-ok x state)))
(define (parser-or p1 p2)
(lambda (state)
(let ([result (p1 state)])
(if (result-ok? result)
result
(p2 state)))))
(define (tag error-msg parser)
(lambda (state)
(let ([result (parser state)])
(if (result-ok? result)
result
(new-result-err (format "~a: ~a"
error-msg
(result-error result)))))))
(define (tag2 error-msg parser)
(parser-or parser
(parser-fail error-msg)))
(define (opt parser)
(parser-or parser
(parser-return #f)))
(define (parser-and p1 p2)
(parser-do
(<- x p1)
(<- y p2)
(parser-return (cons x y))))
(define (parser-bind parser continuation)
(lambda (state)
(let ([result (parser state)])
(if (result-ok? result)
;; continue computations
(let ([new-parser (continuation
(result-value result))])
(new-parser (result-next-state result)))
;; short-circuit
result))))
(define (=<< continuation parser)
(parser-bind parser continuation))
(define (parser-fail error-msg)
(lambda (state) (new-result-err error-msg)))
(define (parser-then . parsers)
(define (parser-then-impl parsers)
(if (null? (cdr parsers))
(parser-do
(<- value (car parsers))
(parser-return value))
(parser-do
(car parsers)
(parser-then-impl (cdr parsers)))))
(parser-then-impl parsers))
(define anychar
(lambda (state)
(cond
[(string-state-eof? state)
(new-result-err "End of file")]
[else
(new-result-ok (string-state-head state) (string-state-next state))])))
(define (char x)
(parser-do
(<- y anychar)
(if (eq? x y)
(parser-return x)
(parser-fail (format "Expected character '~a', but found '~a' instead" x y)))))
(define (any . parsers)
(fold-left parser-or (car parsers) (cdr parsers)))
(define space
(any (char #\space)
(char #\tab)
(char #\newline)))
(define digit
(any (char #\0) (char #\1) (char #\2) (char #\3)
(char #\4) (char #\5) (char #\6) (char #\7)
(char #\8) (char #\9)))
(define (pstring str)
(parser-map
list->string
(parser-sequence (map char (string->list str)))))
(define (parser-when cond parser)
(if cond parser (parser-return #f)))
(define (parser-sequence parsers)
(if (null? parsers)
(parser-return '())
(parser-do
(:= parser (car parsers))
(:= tail (cdr parsers))
(<- value parser)
(<- tail (parser-sequence tail))
(parser-return
(cons value tail)))))
(define (pmany parser)
(parser-or (pmany1 parser)
(parser-return '())))
(define (pmany1 parser)
(parser-end parser
(parser-return '())))
(define (parser-end parser last)
(parser-do
(<- head parser)
(<- tail (parser-or (parser-end parser last)
last))
(parser-return (cons head tail))))
(define (parser-between p1 p2 p3)
(parser-do
p1
(<- value p2)
p3
(parser-return value)))
(define json-null
(tag "Failed to parse JSON null"
(parser-then (pstring "null")
(parser-return 'null))))
(define json-bool
(tag "Failed to parse JSON bool"
(parser-or (parser-then (pstring "true") (parser-return #t))
(parser-then (pstring "false") (parser-return #f)))))
(define json-number
(tag "Failed to parse JSON number"
(parser-map (lambda (num) (string->number (list->string num)))
(pmany1 digit))))
(define json-string
(tag "Failed to parse JSON string"
(parser-do
(:= not-quote
(parser-do
(<- ch anychar)
(parser-when (eq? ch #\")
(parser-fail #f))
(parser-return ch)))
(char #\")
(<- s (pmany not-quote))
(char #\")
(parser-map list->string (parser-return s)))))
(define json-dict
(tag "Failed to parse JSON dictionary"
(parser-do
(:= json-key-val
(parser-do
(<- k json-string)
(pmany space)
(char #\:)
(pmany space)
(<- v json-value)
(parser-return (cons k v))))
(%= json-dict-parse
(parser-do
(pmany space)
(<- next (parser-or (char #\})
(char #\,)))
(if (eq? next #\})
(parser-return '())
(parser-do
(pmany space)
(<- next-key-val json-key-val)
(<- tail json-dict-parse)
(parser-return (cons next-key-val tail))))))
(:= json-dict-parse-begin
(parser-do
(char #\{)
(pmany space)
(<- next (parser-or (char #\})
json-key-val))
(if (eq? next #\})
(parser-return '())
(parser-do
(<- tail json-dict-parse)
(parser-return (cons next tail))))))
json-dict-parse-begin)))
(define json-array
(tag "Failed to parse JSON array"
(parser-do
(%= json-array-parse
(parser-do
(pmany space)
(<- next (parser-or (char #\])
(char #\,)))
(if (eq? next #\])
(parser-return '())
(parser-do
(pmany space)
(<- next json-value)
(<- tail json-array-parse)
(parser-return (cons next tail))))))
(:= json-array-parse-begin
(parser-do
(char #\[)
(pmany space)
(<- next (parser-or (char #\])
json-value))
(if (eq? next #\])
(parser-return '())
(parser-do
(<- tail json-array-parse)
(parser-return (cons next tail))))))
json-array-parse-begin)))
(define json-value
(tag "Exhausted all possible JSON value types"
(any json-null
json-bool
json-string
json-array
json-dict
json-number)))
;; (json-value (new-string-state "[1, 2, 3, 4, 5]"))
;; ==> (#t (1 2 3 4 5) ("[1, 2, 3, 4, 5]" . 15))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment