Last active
October 14, 2015 03:00
-
-
Save hkoba/fbd50f5d61fff8110f4e to your computer and use it in GitHub Desktop.
Experimental ABNF (rfc5234) acceptor in Perl6 grammar
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# See also https://tools.ietf.org/html/rfc5234#section-4 | |
# If you want to trace how this works, install https://github.com/jnthn/grammar-debugger/ | |
# and uncomment below. | |
# | |
# use Grammar::Tracer; | |
grammar ABNF { | |
token TOP { | |
<.ws> <rulelist> | |
} | |
token rulelist { | |
(<rule> || (<c-wsp>* <c-nl>))+ | |
} | |
token rule { | |
<rulename> <defined-as> <elements> <c-nl> | |
} | |
token rulename { | |
<ALPHA> (<ALPHA> || <DIGIT> || "-")* | |
} | |
token defined-as { | |
<c-wsp>* ("=" || "=/") <c-wsp>* | |
} | |
token elements { | |
<alternation> <c-wsp>* | |
} | |
token c-wsp { | |
<WSP> || (<c-nl> <WSP>) | |
} | |
token c-nl { | |
<comment> || <NL> | |
} | |
token comment { | |
";" (<WSP> || <VCHAR>)* <NL> | |
} | |
token alternation { | |
<concatenation> | |
(<c-wsp>* "/" <c-wsp>* <concatenation>)* | |
} | |
token concatenation { | |
<repetition> (<c-wsp>+ <repetition>)* | |
} | |
token repetition { | |
<repeat>? <element> | |
} | |
# | |
# In orignal rfc5234, <repeat> was 1*DIGIT / (*DIGIT "*" *DIGIT) | |
# But it failed to match against "1*(...)". So I swapped precedence. | |
# | |
token repeat { | |
(<DIGIT>* "*" <DIGIT>*) || <DIGIT>+ | |
} | |
token element { | |
<rulename> || <group> || <option> || | |
<char-val> || <num-val> || <prose-val> | |
} | |
token group { | |
"(" <c-wsp>* <alternation> <c-wsp>* ")" | |
} | |
token option { | |
"[" <c-wsp>* <alternation> <c-wsp>* "]" | |
} | |
token char-val { | |
<DQUOTE> (<[\x20 \x21]> || <[\x23..\x7E]>)* <DQUOTE> | |
} | |
token num-val { | |
"%" (<bin-val> || <dec-val> || <hex-val>) | |
} | |
token bin-val { | |
"b" <BIT>+ | |
( ("." <BIT>+)+ || ("-" <BIT>+) )? | |
} | |
token dec-val { | |
"d" <DIGIT>+ | |
( ("." <DIGIT>+)+ || ("-" <DIGIT>+) )? | |
} | |
token hex-val { | |
"x" <HEXDIG>+ | |
( ("." <HEXDIG>+)+ || ("-" <HEXDIG>+) )? | |
} | |
token prose-val { | |
"<" (<[\x20..\x3D]> || <[\x3F..\x7E]>)* ">" | |
} | |
token NL { <CRLF> || <LF> } | |
token ALPHA { <[A..Z a..z]> } | |
token BIT { <[0 1]> } | |
token CHAR { <[\x01..\x7F]> } | |
token LF { "\x0a" } | |
token CR { "\x0d" } | |
token CRLF { "\x0d\x0a" } | |
token DIGIT { <[0..9]> } | |
token DQUOTE { '"' } | |
token HEXDIG { <DIGIT> || <[A..F]> } | |
token HTAB { "\t" } | |
token LWSP { (<WSP> || <CRLF> <WSP>)* } | |
token OCTET { <[\x00..\xFF]> } | |
token SP { " " } | |
token VCHAR { <[\x21..\x7E]> } | |
token WSP { <SP> || <HTAB> } | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rulelist = 1*( rule / (*c-wsp c-nl) ) | |
rule = rulename defined-as elements c-nl | |
; continues if next line starts | |
; with white space | |
rulename = ALPHA *(ALPHA / DIGIT / "-") | |
defined-as = *c-wsp ("=" / "=/") *c-wsp | |
; basic rules definition and | |
; incremental alternatives | |
elements = alternation *c-wsp | |
c-wsp = WSP / (c-nl WSP) | |
c-nl = comment / CRLF | |
; comment or newline | |
comment = ";" *(WSP / VCHAR) CRLF | |
alternation = concatenation | |
*(*c-wsp "/" *c-wsp concatenation) | |
concatenation = repetition *(1*c-wsp repetition) | |
repetition = [repeat] element | |
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) | |
element = rulename / group / option / | |
char-val / num-val / prose-val | |
group = "(" *c-wsp alternation *c-wsp ")" | |
option = "[" *c-wsp alternation *c-wsp "]" | |
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE | |
; quoted string of SP and VCHAR | |
; without DQUOTE | |
num-val = "%" (bin-val / dec-val / hex-val) | |
bin-val = "b" 1*BIT | |
[ 1*("." 1*BIT) / ("-" 1*BIT) ] | |
; series of concatenated bit values | |
; or single ONEOF range | |
dec-val = "d" 1*DIGIT | |
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] | |
hex-val = "x" 1*HEXDIG | |
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] | |
prose-val = "<" *(%x20-3D / %x3F-7E) ">" | |
; bracketed string of SP and VCHAR | |
; without angles | |
; prose description, to be used as | |
; last resort |
To play this, simply:
perl6 -I. -MABNF -e 'say "Matched until: ", ABNF.new.subparse("-".IO.slurp).to' < rfc5234_abnf.txt
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See also https://tools.ietf.org/html/rfc5234#section-4