Last active
July 31, 2018 03:48
-
-
Save akeep/cef4fcc5d2adb6d23a81741dccf93374 to your computer and use it in GitHub Desktop.
Simple little pattern matcher in a syntax-case macro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; match.ss: a simple pattern matcher in scheme | |
;;; | |
;;; Copyright Andy Keep | |
;;; Licensed under the CRAPL: http://matt.might.net/articles/crapl/ | |
;;; | |
;;; I've used or written variations on this kind of a match syntax | |
;;; for a long time now and finally decided to pull together one of | |
;;; my own. It matches some in syntax and probably inadvertantly | |
;;; steals some of the design pattern (in this case the success and | |
;;; failure continuations, but was written from scratch and could | |
;;; almost certainly use improvement. | |
;;; | |
;;; Syntax: | |
;;; (match <exp> <cl> ...) | |
;;; | |
;;; where <cl> is: | |
;;; | |
;;; <cl> => [<pat> (guard <exp> ... <exp>) <exp> ... <exp>] | |
;;; [<pat> <exp> ... <exp>] | |
;;; [else <exp> ... <exp>] | |
;;; | |
;;; where the "else" clause may only appear as the last clause. The guarded | |
;;; pattern matches when <pat> matches and all of the <exp> in | |
;;; (guard <exp> ... <exp>) evaluate to true (<exp> in guard are effectively | |
;;; treated as an and). The unguarded pattern matches when <pat> is matched, | |
;;; and the else clause matches when all else fails. Clauses are evaluated in | |
;;; order, from first to last, with the else clause executed when all other | |
;;; clauses are exhausted. If no else clause exists, match will raise an error | |
;;; to indicate it failed to find a suitable match. | |
;;; | |
;;; where <pat> is of the form: | |
;;; <pat> => sym -- matches symbol exactly | |
;;; (<pat>0 . <pat>1) -- matches a pair with <pat>0 as car and <pat>1 as cdr | |
;;; (<pat> ...) -- matches 0 or more <pat> | |
;;; (<pat>0 ... <pat>1) -- matches 0 or more <pat>0 followed by a <pat>1 | |
;;; ,id -- binds id to the current expression | |
;;; | |
;;; examples: | |
;;; | |
;;; (match e | |
;;; [(lambda (,x) ,body) (guard (symbol? x)) ---] | |
;;; [(,e0 ,e1) ---] | |
;;; [,x (guard (symbol? x)) ---]) | |
;;; | |
;;; matches the terms of the lambda calculus and | |
;;; | |
;;; (match e | |
;;; [(lambda (,x* ...) ,body* ... ,body) (guard (andmap symbol? x*)) ---] | |
;;; [(let ([,x* ,e*] ...) ,body* ... ,body) (guard (andmap symbol? x*)) ---] | |
;;; [(letrec ([,x* ,e*] ...) ,body* ... ,body) (guard (andmap symbol? x*)) ---] | |
;;; [(if ,e0 ,e1 ,e2) ---] | |
;;; [(,e ,e* ...) ---] | |
;;; [,x (guard (symbol? x)) ---] | |
;;; [else ---]) | |
;;; | |
;;; matches a subset of scheme. | |
;;; | |
(define-syntax match | |
(lambda (x) | |
(define (extract-bindings pat) | |
(let f ([pat pat] [bindings '()]) | |
(syntax-case pat (unquote) | |
[,bind (identifier? #'bind) (cons #'bind bindings)] | |
[(?a . ?d) (f #'?a (f #'?d bindings))] | |
[_ bindings]))) | |
(define (process-pattern id pat body fk) | |
(with-syntax ([id id] [fk fk]) | |
(syntax-case pat (unquote) | |
[,bind (identifier? #'bind) #`(let ([bind id]) #,body)] | |
[(?a dots) | |
(eq? (datum dots) '...) | |
(with-syntax ([(binding ...) (extract-bindings #'?a)] | |
[(t0 t1 loop) (generate-temporaries '(t0 t1 loop))]) | |
(with-syntax ([(tbinding ...) (generate-temporaries #'(binding ...))]) | |
#`(let loop ([t0 id] [tbinding '()] ...) | |
(cond | |
[(pair? t0) | |
(let ([t1 (car t0)] [t0 (cdr t0)]) | |
#,(process-pattern #'t1 #'?a | |
#'(loop t0 (cons binding tbinding) ...) | |
#'fk))] | |
[(null? t0) | |
(let ([binding (reverse tbinding)] ...) | |
#,body)] | |
[else (fk)]))))] | |
[(?a dots . ?d) | |
(eq? (datum dots) '...) | |
(with-syntax ([(binding ...) (extract-bindings #'?a)] | |
[(t0 t1 new-fk loop) (generate-temporaries '(t0 t1 new-fk loop))]) | |
(with-syntax ([(tbinding ...) (generate-temporaries #'(binding ...))]) | |
#`(let loop ([t0 id] [tbinding '()] ...) | |
(let ([new-fk (lambda () | |
(if (pair? t0) | |
(let ([t1 (car t0)] [t0 (cdr t0)]) | |
#,(process-pattern #'t1 #'?a | |
#'(loop t0 (cons binding tbinding) ...) | |
#'fk)) | |
(fk)))]) | |
#,(process-pattern #'t0 #'?d | |
#`(let ([binding (reverse tbinding)] ...) | |
#,body) | |
#'new-fk)))))] | |
[(?a . ?d) | |
(with-syntax ([(a d) (generate-temporaries '(a d))]) | |
#`(if (pair? id) | |
(let ([a (car id)] [d (cdr id)]) | |
#,(process-pattern #'a #'?a | |
(process-pattern #'d #'?d body #'fk) | |
#'fk)) | |
(fk)))] | |
[sym (identifier? #'sym) #`(if (eq? id 'sym) #,body (fk))] | |
[() #`(if (null? id) #,body (fk))]))) | |
(define (process-clause id cl fk) | |
(syntax-case cl (guard) | |
[[pat (guard e0 e1 ...) body0 body1 ...] | |
(process-pattern id #'pat | |
#`(if (and e0 e1 ...) | |
(begin body0 body1 ...) | |
(#,fk)) | |
fk)] | |
[[pat body0 body1 ...] | |
(process-pattern id #'pat #'(begin body0 body1 ...) fk)])) | |
(define (process-match id cl* else-body) | |
(let f ([cl* cl*]) | |
(if (null? cl*) | |
else-body | |
(let ([cl (car cl*)] [cl* (cdr cl*)]) | |
(with-syntax ([(fk) (generate-temporaries '(fk))]) | |
#`(let ([fk (lambda () #,(f cl*))]) | |
#,(process-clause id cl #'fk))))))) | |
(syntax-case x (else) | |
[(_ id cl ... [else ebody0 ebody1 ...]) | |
(identifier? #'id) | |
(process-match #'id #'(cl ...) #'(begin ebody0 ebody1 ...))] | |
[(_ id cl ...) | |
(identifier? #'id) | |
#'(match id | |
cl ... | |
[else (errorf 'match "~s does not match any clauses" id)])] | |
[(_ e cl ... [else ebody0 ebody1 ...]) | |
#'(let ([t e]) (match t cl ... [else ebody0 ebody1 ...]))] | |
[(_ e cl ...) | |
#'(let ([t e]) | |
(match t | |
cl ... | |
[else (errorf 'match "~s does not match any clauses" t)]))]))) |
It also looks like line 117 needs to be syntax quasiquote:
#`(if (and e0 e1 ...)
(begin body0 body1 ...)
(#,fk))
Ah, good catches! I was a little inconsistent in my use of the ?
prefix for pattern variables. This is a habit I'm trying to pick-up, but not one that I usually use. A good catch on the syntax quasi quote as well.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I think there may be a tiny typo on line 69. It seems the
(identifier? #'?bind)
call should be(identifier? #'bind)
. As it stands, the macro lets me write this:(match 'x [,1 1])
which generates the exception: Invalid bound variable 1.