Skip to content

Instantly share code, notes, and snippets.

@hkoba
Last active October 14, 2015 03:00
Show Gist options
  • Save hkoba/fbd50f5d61fff8110f4e to your computer and use it in GitHub Desktop.
Save hkoba/fbd50f5d61fff8110f4e to your computer and use it in GitHub Desktop.
Experimental ABNF (rfc5234) acceptor in Perl6 grammar
#
# See also https://tools.ietf.org/html/rfc5234#section-4
# If you want to trace how this works, install https://github.com/jnthn/grammar-debugger/
# and uncomment below.
#
# use Grammar::Tracer;
grammar ABNF {
token TOP {
<.ws> <rulelist>
}
token rulelist {
(<rule> || (<c-wsp>* <c-nl>))+
}
token rule {
<rulename> <defined-as> <elements> <c-nl>
}
token rulename {
<ALPHA> (<ALPHA> || <DIGIT> || "-")*
}
token defined-as {
<c-wsp>* ("=" || "=/") <c-wsp>*
}
token elements {
<alternation> <c-wsp>*
}
token c-wsp {
<WSP> || (<c-nl> <WSP>)
}
token c-nl {
<comment> || <NL>
}
token comment {
";" (<WSP> || <VCHAR>)* <NL>
}
token alternation {
<concatenation>
(<c-wsp>* "/" <c-wsp>* <concatenation>)*
}
token concatenation {
<repetition> (<c-wsp>+ <repetition>)*
}
token repetition {
<repeat>? <element>
}
#
# In orignal rfc5234, <repeat> was 1*DIGIT / (*DIGIT "*" *DIGIT)
# But it failed to match against "1*(...)". So I swapped precedence.
#
token repeat {
(<DIGIT>* "*" <DIGIT>*) || <DIGIT>+
}
token element {
<rulename> || <group> || <option> ||
<char-val> || <num-val> || <prose-val>
}
token group {
"(" <c-wsp>* <alternation> <c-wsp>* ")"
}
token option {
"[" <c-wsp>* <alternation> <c-wsp>* "]"
}
token char-val {
<DQUOTE> (<[\x20 \x21]> || <[\x23..\x7E]>)* <DQUOTE>
}
token num-val {
"%" (<bin-val> || <dec-val> || <hex-val>)
}
token bin-val {
"b" <BIT>+
( ("." <BIT>+)+ || ("-" <BIT>+) )?
}
token dec-val {
"d" <DIGIT>+
( ("." <DIGIT>+)+ || ("-" <DIGIT>+) )?
}
token hex-val {
"x" <HEXDIG>+
( ("." <HEXDIG>+)+ || ("-" <HEXDIG>+) )?
}
token prose-val {
"<" (<[\x20..\x3D]> || <[\x3F..\x7E]>)* ">"
}
token NL { <CRLF> || <LF> }
token ALPHA { <[A..Z a..z]> }
token BIT { <[0 1]> }
token CHAR { <[\x01..\x7F]> }
token LF { "\x0a" }
token CR { "\x0d" }
token CRLF { "\x0d\x0a" }
token DIGIT { <[0..9]> }
token DQUOTE { '"' }
token HEXDIG { <DIGIT> || <[A..F]> }
token HTAB { "\t" }
token LWSP { (<WSP> || <CRLF> <WSP>)* }
token OCTET { <[\x00..\xFF]> }
token SP { " " }
token VCHAR { <[\x21..\x7E]> }
token WSP { <SP> || <HTAB> }
}
rulelist = 1*( rule / (*c-wsp c-nl) )
rule = rulename defined-as elements c-nl
; continues if next line starts
; with white space
rulename = ALPHA *(ALPHA / DIGIT / "-")
defined-as = *c-wsp ("=" / "=/") *c-wsp
; basic rules definition and
; incremental alternatives
elements = alternation *c-wsp
c-wsp = WSP / (c-nl WSP)
c-nl = comment / CRLF
; comment or newline
comment = ";" *(WSP / VCHAR) CRLF
alternation = concatenation
*(*c-wsp "/" *c-wsp concatenation)
concatenation = repetition *(1*c-wsp repetition)
repetition = [repeat] element
repeat = 1*DIGIT / (*DIGIT "*" *DIGIT)
element = rulename / group / option /
char-val / num-val / prose-val
group = "(" *c-wsp alternation *c-wsp ")"
option = "[" *c-wsp alternation *c-wsp "]"
char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
; quoted string of SP and VCHAR
; without DQUOTE
num-val = "%" (bin-val / dec-val / hex-val)
bin-val = "b" 1*BIT
[ 1*("." 1*BIT) / ("-" 1*BIT) ]
; series of concatenated bit values
; or single ONEOF range
dec-val = "d" 1*DIGIT
[ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]
hex-val = "x" 1*HEXDIG
[ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]
prose-val = "<" *(%x20-3D / %x3F-7E) ">"
; bracketed string of SP and VCHAR
; without angles
; prose description, to be used as
; last resort
@hkoba
Copy link
Author

hkoba commented Oct 14, 2015

@hkoba
Copy link
Author

hkoba commented Oct 14, 2015

To play this, simply:

perl6 -I. -MABNF -e 'say "Matched until: ", ABNF.new.subparse("-".IO.slurp).to' < rfc5234_abnf.txt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment