Created
November 2, 2011 10:39
-
-
Save Lovesan/1333362 to your computer and use it in GitHub Desktop.
simple parser combinator library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(in-package :cl-user) | |
(defpackage #:peg-combinators | |
(:use #:cl) | |
(:nicknames #:pegc) | |
(:export | |
#:defparser | |
#:defrule | |
#:result | |
#:result-p | |
#:result-value | |
#:result-position | |
#:result-error | |
#:parsing-error | |
#:parsing-error-p | |
#:parsing-error-position | |
#:parsing-error-error | |
#:parsing-error-message | |
#:select-error | |
#:char-class | |
#:any-char | |
#:literal | |
#:seq | |
#:choice | |
#:optional | |
#:zero-or-more | |
#:one-or-more | |
#:not-p | |
#:and-p | |
#:enable-pegc-syntax | |
#:disable-pegc-syntax)) | |
(in-package #:peg-combinators) | |
(deftype index () '(integer 0 #.(1- array-total-size-limit))) | |
(defmacro with-gensyms ((&rest symbols) &body body) | |
`(let ,(mapcar (lambda (symbol) | |
`(,symbol (gensym ,(symbol-name symbol)))) | |
symbols) | |
,@body)) | |
(defmacro defparser (name (&rest args) (input start end &rest other) | |
&body body) | |
`(progn | |
(defmacro ,name ,args | |
(with-gensyms (,input ,start ,end ,@other) | |
`(lambda (,,input &key ((:start ,,start) 0) ((:end ,,end) nil)) | |
(declare (type string ,,input) | |
(type index ,,start) | |
(type (or null index) ,,end)) | |
(when (null ,,end) (setf ,,end (length ,,input))) | |
,,@body))) | |
,(when (null args) | |
`(define-symbol-macro ,name (,name))) | |
',name)) | |
(defstruct (result | |
(:constructor result (position value &optional error))) | |
(value nil :read-only t) | |
(position 0 :read-only t) | |
(error nil :read-only t)) | |
(defstruct (parsing-error | |
(:include result) | |
(:constructor parsing-error (position message &optional error))) | |
(message "Parsing error" :read-only t)) | |
(defgeneric select-error (e1 e2) | |
(:method ((e1 null) (e2 null)) | |
e1) | |
(:method ((e1 null) (e2 result)) | |
(result-error e2)) | |
(:method ((e1 null) (e2 parsing-error)) | |
e2) | |
(:method ((e1 result) (e2 result)) | |
(select-error (result-error e1) (result-error e2))) | |
(:method ((e1 result) (e2 parsing-error)) | |
(select-error (result-error e1) e2)) | |
(:method ((e1 result) (e2 null)) | |
(result-error e1)) | |
(:method ((e1 parsing-error) (e2 parsing-error)) | |
(if (<= (result-position e1) (result-position e2)) | |
e2 | |
e1)) | |
(:method ((e1 parsing-error) (e2 result)) | |
(select-error e1 (result-error e2))) | |
(:method ((e1 parsing-error) (e2 null)) | |
e1)) | |
(defparser any-char () (input start end) | |
`(if (< ,start ,end) | |
(result (1+ ,start) (char ,input ,start)) | |
(parsing-error ,start "Character expected."))) | |
(defparser char-class (&rest cases) (input start end char) | |
`(if (and (> (- ,end ,start) 0) | |
(let ((,char (char ,input ,start))) | |
,(intern-char-cases char cases))) | |
(result (1+ ,start) (char ,input ,start)) | |
(parsing-error ,start ,(make-char-error-message cases)))) | |
(eval-when (:compile-toplevel :load-toplevel :execute) | |
(defun intern-char-cases (char-var cases) | |
`(or ,@(mapcar (lambda (case) | |
(etypecase case | |
(character | |
`(char= ,char-var ,case)) | |
((cons character (cons character null)) | |
`(char<= ,(first case) ,char-var ,(second case))))) | |
cases))) | |
(defun make-char-error-message (cases) | |
(format nil "Expected character of class [~{~a~}]." | |
(mapcar (lambda (case) | |
(if (characterp case) | |
(escape-char case) | |
(concatenate 'string | |
(escape-char (first case)) | |
"-" | |
(escape-char (second case))))) | |
cases))) | |
(defun escape-char (char) | |
(case char | |
(#\Tab "\\t") | |
(#\Return "\\r") | |
(#\Newline "\\n") | |
(#\[ "\\[") | |
(#\] "\\]") | |
(T (string char))))) | |
(defparser literal (string-designator) (input start end end*) | |
(let* ((string (string string-designator)) | |
(length (length string))) | |
`(let ((,end* (+ ,start ,length))) | |
(if (or (< ,end ,end*) | |
(string/= ,input ,string | |
:start1 ,start | |
:end1 ,end*)) | |
(parsing-error ,start ,(format nil "~s expected." string)) | |
(result ,end* (copy-seq ,string)))))) | |
(defparser seq (&rest expressions) | |
(input start end block result error values position) | |
`(block ,block | |
(let (,result ,error ,values (,position ,start)) | |
(declare (ignorable ,result ,error)) | |
,@(loop :for expr :in expressions :collect | |
`(progn | |
(setf ,result (funcall ,expr ,input | |
:start ,position | |
:end ,end) | |
,error (select-error ,error ,result) | |
,position (result-position ,result)) | |
(when (parsing-error-p ,result) | |
(return-from ,block ,error)) | |
(push (result-value ,result) ,values))) | |
(result ,position (nreverse ,values) ,error)))) | |
(defparser choice (first &rest rest) | |
(input start end block result error) | |
`(block ,block | |
(let* (,result ,error) | |
,@(loop :for expr :in (cons first rest) :collect | |
`(progn | |
(setf ,result (funcall ,expr ,input | |
:start ,start | |
:end ,end) | |
,error (select-error ,error ,result)) | |
(unless (parsing-error-p ,result) | |
(return-from ,block (result (result-position ,result) | |
(result-value ,result) | |
,error))))) | |
,error))) | |
(defmacro optional (rule) | |
`(choice ,rule (seq))) | |
(defparser zero-or-more (rule) | |
(input start end result error values position) | |
`(let (,result ,error ,values (,position ,start)) | |
(loop (setf ,result (funcall ,rule ,input | |
:start ,position | |
:end ,end) | |
,error (select-error ,error ,result)) | |
(if (parsing-error-p ,result) | |
(return) | |
(setf ,position (result-position ,result) | |
,values (cons (result-value ,result) ,values)))) | |
(result ,position (nreverse ,values) ,error))) | |
(defparser one-or-more (rule) (input start end result func) | |
`(let* ((,func ,rule) | |
(,result (funcall (seq ,func (zero-or-more ,func)) ,input | |
:start ,start | |
:end ,end))) | |
(if (parsing-error-p ,result) | |
,result | |
(result (result-position ,result) | |
(apply #'cons (result-value ,result)) | |
(result-error ,result))))) | |
(defparser not-p (rule) (input start end result) | |
`(let ((,result (funcall ,rule ,input | |
:start ,start | |
:end ,end))) | |
(if (parsing-error-p ,result) | |
(result ,start nil (result-error ,result)) | |
(parsing-error ,start | |
,(format nil "Unexpected success of ~s" rule) | |
(result-error ,result))))) | |
(defparser and-p (rule) (input start end result) | |
`(let ((,result (funcall ,rule ,input | |
:start ,start | |
:end ,end))) | |
(if (parsing-error-p ,result) | |
,result | |
(result ,start nil (result-error ,result))))) | |
(defmacro defrule (name rule &optional (value-transformer | |
'(function value-identity))) | |
(with-gensyms (input start end result) | |
`(progn (defun ,name (,input &key ((:start ,start) 0) ((:end ,end) nil)) | |
(declare (type string ,input) | |
(type index ,start) | |
(type (or null index) ,end)) | |
(when (null ,end) (setf ,end (length ,input))) | |
(let ((,result (funcall ,rule ,input :start ,start :end ,end))) | |
(if (parsing-error-p ,result) | |
,result | |
(result (result-position ,result) | |
(funcall ,value-transformer | |
(result-value ,result) | |
,input | |
,start | |
,end) | |
(result-error ,result))))) | |
(define-symbol-macro ,name #',name)))) | |
(defun value-identity (value input start end) | |
(declare (ignore input start end)) | |
value) | |
(in-package #:cl-user) | |
(defpackage #:peg-combinators.numbers | |
(:use #:cl #:pegc) | |
(:nicknames #:pegc.numbers) | |
(:export #:unsigned-int | |
#:signed-int | |
#:rationum | |
#:flonum)) | |
(in-package #:pegc.numbers) | |
(defrule unsigned-int | |
(one-or-more (char-class (#\0 #\9))) | |
#'intern-unsigned-int) | |
(defun intern-unsigned-int (value input start end) | |
(declare (ignore input start end)) | |
(parse-integer (coerce value 'string))) | |
(defrule signed-int | |
(seq (optional (char-class #\+ #\-)) | |
unsigned-int) | |
#'intern-signed-int) | |
(defun intern-signed-int (value input start end) | |
(declare (ignore input start end)) | |
(destructuring-bind | |
(sign value) value | |
(* value (if (eql sign #\-) -1 1)))) | |
(defrule rationum | |
(seq signed-int (char-class #\/) unsigned-int) | |
#'intern-rationum) | |
(defun intern-rationum (value input start end) | |
(declare (ignore input start end)) | |
(destructuring-bind | |
(numerator slash denominator) value | |
(declare (ignore slash)) | |
(/ numerator denominator))) | |
(defrule expt-part | |
(seq (char-class #\s #\S #\e #\E #\d #\D #\l #\L) | |
signed-int) | |
#'intern-expt-part) | |
(defun intern-expt-part (value input start end) | |
(declare (ignore input start end)) | |
(float (expt 10 (second value)) | |
(ecase (character (first value)) | |
((#\s #\S) 1.0s0) | |
((#\e #\E) 1.0e0) | |
((#\d #\D) 1.0d0) | |
((#\l #\L) 1.0l0)))) | |
(defrule flonum | |
(choice (seq (optional signed-int) | |
(char-class #\.) | |
(one-or-more (char-class (#\0 #\9))) | |
(optional expt-part)) | |
(seq signed-int | |
(char-class #\.) | |
(zero-or-more (char-class (#\0 #\9))) | |
(optional expt-part))) | |
#'intern-flonum) | |
(defun intern-flonum (value input start end) | |
(declare (ignore input start end)) | |
(destructuring-bind | |
(int-part dot frac-part expt-part) value | |
(declare (ignore dot)) | |
(let ((frac-part (coerce frac-part 'string))) | |
(* (+ (or int-part 0) | |
(/ (if (string= "" frac-part) | |
0 | |
(parse-integer frac-part)) | |
(expt 10 (length frac-part)))) | |
(or expt-part 1.0e0))))) | |
(in-package #:cl-user) | |
(defpackage #:peg-combinators.syntax | |
(:use #:cl #:pegc) | |
(:nicknames #:pegc.syntax)) | |
(in-package #:pegc.syntax) | |
#| | |
Expression <- Spacing Sequence (SLASH Sequence)* | |
Sequence <- Prefix* | |
Prefix <- (AND / NOT)? Suffix | |
Suffix <- Primary (QUESTION / STAR / PLUS)? | |
Primary <- LispForm | |
/ Identifier !LEFTARROW | |
/ OPEN Expression CLOSE | |
/ Literal / Class / DOT | |
# Lexical syntax | |
Identifier <- IdentStart IdentCont* Spacing | |
IdentStart <- [a-zA-Z_] | |
IdentCont <- IdentStart / [0-9] | |
LispForm <- [`] (![`] Char)* [`] Spacing | |
Literal <- [’] (![’] Char)* [’] Spacing | |
/ ["] (!["] Char)* ["] Spacing | |
Class <- ’[’ (!’]’ Range)* ’]’ Spacing | |
Range <- Char ’-’ Char / Char | |
Char <- ’\\’ [nrt`’"\[\]\\] | |
/ ’\\’ [0-2][0-7][0-7] | |
/ ’\\’ [0-7][0-7]? | |
/ !’\\’ . | |
LEFTARROW <- ’<-’ Spacing | |
SLASH <- ’/’ Spacing | |
AND <- ’&’ Spacing | |
NOT <- ’!’ Spacing | |
QUESTION <- ’?’ Spacing | |
STAR <- ’*’ Spacing | |
PLUS <- ’+’ Spacing | |
LPAREN <- ’(’ Spacing | |
RPAREN <- ’)’ Spacing | |
DOT <- ’.’ Spacing | |
Spacing <- (Space / Comment)* | |
Comment <- ’#’ (!EndOfLine .)* EndOfLine | |
Spaces <- ’ ’ / ’\t’ / EndOfLine | |
EndOfLine <- ’\r\n’ / ’\n’ / ’\r’ | |
EndOfFile <- !. | |
|# | |
(defrule eof | |
(not-p any-char)) | |
(defrule eol | |
(choice (literal #.(coerce '(#\Return #\Newline) 'string)) | |
(char-class #\Newline #\Return))) | |
(defrule spaces | |
(choice (char-class #\Space #\Tab) | |
eol)) | |
(defrule comment | |
(seq (char-class #\#) | |
(zero-or-more (seq (not-p eol) any-char)) | |
eol)) | |
(defrule spacing | |
(zero-or-more (choice spaces comment))) | |
(defrule dot | |
(seq (literal #\.) spacing) | |
(lambda (value input start end) | |
(declare (ignore value input start end)) | |
'any-char)) | |
(defrule lparen | |
(seq (literal #\() spacing)) | |
(defrule rparen | |
(seq (literal #\)) spacing)) | |
(defrule question | |
(seq (literal #\?) spacing) | |
(lambda (value input start end) | |
(declare (ignore value input start end)) | |
#\?)) | |
(defrule star | |
(seq (literal #\*) spacing) | |
(lambda (value input start end) | |
(declare (ignore value input start end)) | |
#\*)) | |
(defrule plus | |
(seq (literal #\+) spacing) | |
(lambda (value input start end) | |
(declare (ignore value input start end)) | |
#\+)) | |
(defrule and-literal | |
(seq (literal #\&) spacing) | |
(lambda (value input start end) | |
(declare (ignore value input start end)) | |
#\&)) | |
(defrule not-literal | |
(seq (literal #\!) spacing) | |
(lambda (value input start end) | |
(declare (ignore value input start end)) | |
#\!)) | |
(defrule slash | |
(seq (literal #\/) spacing)) | |
(defrule left-arrow | |
(seq (literal "<-") spacing)) | |
(defrule hexdigit | |
(char-class (#\0 #\9) (#\A #\F) (#\a #\f))) | |
(defrule peg-char | |
(choice (seq (char-class #\\) | |
(char-class #\n #\r #\t #\` #\' #\" #\[ #\] #\\)) | |
(seq (char-class #\\) | |
(char-class #\u) | |
(one-or-more hexdigit)) | |
(seq (not-p (char-class #\\)) | |
any-char)) | |
#'intern-peg-char) | |
(defun intern-peg-char (value input start end) | |
(declare (ignore input start end)) | |
(if (eql #\\ (first value)) | |
(case (second value) | |
(#\n #\Newline) | |
(#\r #\Return) | |
(#\t #\Tab) | |
(#\u (let ((code (parse-integer (coerce (third value) 'string) | |
:radix 16))) | |
(if (< code char-code-limit) | |
(code-char code) | |
(error "\\u~{~a~} is bigger than CHAR-CODE-LIMIT" | |
(third value))))) | |
(T (second value))) | |
(second value))) | |
(defrule peg-range | |
(choice (seq peg-char (char-class #\-) peg-char) | |
peg-char) | |
#'intern-peg-range) | |
(defun intern-peg-range (value input start end) | |
(declare (ignore input start end)) | |
(if (characterp value) | |
value | |
(list (first value) (third value)))) | |
(defrule peg-class | |
(seq (char-class #\[) | |
(zero-or-more (seq (not-p (char-class #\])) peg-range)) | |
(char-class #\]) | |
spacing) | |
#'intern-peg-class) | |
(defun intern-peg-class (value input start end) | |
(declare (ignore input start end)) | |
`(char-class ,@(mapcar #'second | |
(second value)))) | |
(defrule peg-literal | |
(choice (seq (char-class #\') | |
(zero-or-more (seq (not-p (char-class #\')) peg-char)) | |
(char-class #\') | |
spacing) | |
(seq (char-class #\") | |
(zero-or-more (seq (not-p (char-class #\")) peg-char)) | |
(char-class #\") | |
spacing)) | |
#'intern-peg-literal) | |
(defun intern-peg-literal (value input start end) | |
(declare (ignore input start end)) | |
`(literal ,(coerce (mapcar #'second (second value)) 'string))) | |
(defrule lisp-form | |
(seq (char-class #\`) | |
(zero-or-more (seq (not-p (char-class #\`)) peg-char)) | |
(char-class #\`) | |
spacing) | |
#'intern-lisp-form) | |
(defun intern-lisp-form (value input start end) | |
(declare (ignore value)) | |
(with-input-from-string (in (subseq input (1+ start) (1- end))) | |
(loop :with eof = (gensym) | |
:for form = (read in nil eof) | |
:until (eq form eof) :collect form :into forms | |
:finally (return (if (endp forms) | |
'(seq) | |
`(progn ,@forms)))))) | |
(defrule ident-start | |
(char-class (#\a #\z) (#\A #\Z) #\-)) | |
(defrule ident-cont | |
(choice ident-start (char-class (#\0 #\9)))) | |
(defrule identifier | |
(seq ident-start (zero-or-more ident-cont) spacing) | |
#'intern-identifier) | |
(defun intern-identifier (value input start end) | |
(declare (ignore input start end)) | |
(intern (format nil "~a~{~a~}" (first value) (second value)))) | |
(defrule expr-in-parens | |
(seq lparen #'expression rparen) | |
(lambda (value input start end) | |
(declare (ignore input start end)) | |
(second value))) | |
(defrule id-not-defn | |
(seq identifier (not-p left-arrow)) | |
(lambda (value input start end) | |
(declare (ignore input start end)) | |
(first value))) | |
(defrule primary | |
(choice lisp-form | |
id-not-defn | |
expr-in-parens | |
peg-literal | |
peg-class | |
dot)) | |
(defrule suffix | |
(seq primary (optional (choice question star plus))) | |
#'intern-suffix) | |
(defun intern-suffix (value input start end) | |
(declare (ignore input start end)) | |
(case (second value) | |
(#\? `(optional ,(first value))) | |
(#\* `(zero-or-more ,(first value))) | |
(#\+ `(one-or-more ,(first value))) | |
(T (first value)))) | |
(defrule prefix | |
(seq (optional (choice and-literal not-literal)) suffix) | |
#'intern-prefix) | |
(defun intern-prefix (value input start end) | |
(declare (ignore input start end)) | |
(case (first value) | |
(#\& `(and-p ,(second value))) | |
(#\! `(not-p ,(second value))) | |
(T (second value)))) | |
(defrule expr-seq | |
(zero-or-more prefix) | |
(lambda (value input start end) | |
(declare (ignore input start end)) | |
(if (= 1 (length value)) | |
(first value) | |
`(seq ,@value)))) | |
(defrule expression | |
(seq expr-seq (zero-or-more (seq slash expr-seq))) | |
(lambda (value input start end) | |
(declare (ignore input start end)) | |
(if (endp (second value)) | |
(first value) | |
`(choice ,(first value) | |
,@(mapcar #'second (second value)))))) | |
(defrule rule | |
(seq spacing expression eof) | |
(lambda (value input start end) | |
(declare (ignore input start end)) | |
(second value))) | |
(defun read-lbracket (stream char) | |
(declare (ignore char)) | |
(let* ((string (with-output-to-string (out) | |
(loop :with level = 1 | |
:for c = (read-char stream) | |
:do (case c | |
(#\[ (incf level) (write-char c out)) | |
(#\] (unless (zerop (decf level)) | |
(write-char c out))) | |
(T (write-char c out))) | |
:while (> level 0)))) | |
(result (funcall rule string))) | |
(if (parsing-error-p result) | |
(error "Parsing error at ~a. Position ~a after `[': ~a" | |
stream | |
(result-position result) | |
(parsing-error-message result)) | |
(let ((input (gensym "INPUT")) | |
(start (gensym "START")) | |
(end (gensym "END"))) | |
`(lambda (,input &key ((:start ,start) 0) ((:end ,end) nil)) | |
(funcall ,(result-value result) | |
,input | |
:start ,start | |
:end ,end)))))) | |
(defun read-rbracket (stream char) | |
(declare (ignore stream char)) | |
(error "Unmatched close bracket.")) | |
(defvar *pegc-readtables* '()) | |
(defun %enable-pegc-syntax () | |
(push *readtable* *pegc-readtables*) | |
(setf *readtable* (copy-readtable)) | |
(set-macro-character #\[ #'read-lbracket) | |
(set-macro-character #\] #'read-rbracket) | |
(values)) | |
(defun %disable-pegc-syntax () | |
(setf *readtable* (if (null *pegc-readtables*) | |
(copy-readtable nil) | |
(pop *pegc-readtables*))) | |
(values)) | |
(defmacro pegc:enable-pegc-syntax () | |
`(eval-when (:compile-toplevel :load-toplevel :execute) | |
(%enable-pegc-syntax))) | |
(defmacro pegc:disable-pegc-syntax () | |
`(eval-when (:compile-toplevel :load-toplevel :execute) | |
(%disable-pegc-syntax))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment