hkoba · October 14, 2015 03:00 · hkoba · Oct 14, 2015 · hkoba · Oct 14, 2015
diff --git a/ABNF.pm6 b/ABNF.pm6
 #
 # See also https://tools.ietf.org/html/rfc5234#section-4
 
 # If you want to trace how this works, install https://github.com/jnthn/grammar-debugger/
 # and uncomment below.
 #
 # use Grammar::Tracer;

 grammar ABNF {

    token TOP {
 	<.ws> <rulelist>
    }
    
    token rulelist {
 	(<rule> || (<c-wsp>* <c-nl>))+
    }
    
    token rule {
 	<rulename> <defined-as> <elements> <c-nl>
    }
    
    token rulename {
 	<ALPHA> (<ALPHA> || <DIGIT> || "-")*
    }
    
    token defined-as {
 	<c-wsp>* ("=" || "=/") <c-wsp>*
    }
    
    token elements {
 	<alternation> <c-wsp>*
    }
    
    token c-wsp {
 	<WSP> || (<c-nl> <WSP>)
    }
    
    token c-nl {
 	<comment> || <NL>
    }
    
    token comment {
 	";" (<WSP> || <VCHAR>)* <NL>
    }
    
    token alternation {
 	<concatenation>
 	(<c-wsp>* "/" <c-wsp>* <concatenation>)*
    }
    
    token concatenation {
 	<repetition> (<c-wsp>+ <repetition>)*
    }
    
    token repetition {
 	<repeat>? <element>
    }
    
    #
    # In orignal rfc5234, <repeat> was  1*DIGIT / (*DIGIT "*" *DIGIT)
    # But it failed to match against "1*(...)". So I swapped precedence.
    #
    token repeat {
 	(<DIGIT>* "*" <DIGIT>*) || <DIGIT>+
    }
    
    token element {
 	<rulename> || <group> || <option> ||
 	<char-val> || <num-val> || <prose-val>
    }
    
    token group {
 	"(" <c-wsp>* <alternation> <c-wsp>* ")"
    }
    
    token option {
 	"[" <c-wsp>* <alternation> <c-wsp>* "]"
    }
    
    token char-val {
 	<DQUOTE> (<[\x20 \x21]> || <[\x23..\x7E]>)* <DQUOTE>
    }
    
    token num-val {
 	"%" (<bin-val> || <dec-val> || <hex-val>)
    }
    
    token bin-val {
 	"b" <BIT>+
 	( ("." <BIT>+)+ || ("-" <BIT>+) )?
    }

    token dec-val {
 	"d" <DIGIT>+
 	( ("." <DIGIT>+)+ || ("-" <DIGIT>+) )?
    }
    
    token hex-val {
 	"x" <HEXDIG>+
 	( ("." <HEXDIG>+)+ || ("-" <HEXDIG>+) )?
    }
    
    token prose-val {
 	"<" (<[\x20..\x3D]> || <[\x3F..\x7E]>)* ">"
    }
    
    token NL     { <CRLF> || <LF> }
    
    token ALPHA  { <[A..Z a..z]>            }
    token BIT    { <[0 1]>                  }
    token CHAR   { <[\x01..\x7F]>           }
    token LF     { "\x0a"                   }
    token CR     { "\x0d"                   }
    token CRLF   { "\x0d\x0a"               }
    token DIGIT  { <[0..9]>                 }
    token DQUOTE { '"'                      }
    token HEXDIG { <DIGIT> || <[A..F]>      }
    token HTAB   { "\t"                     }
    token LWSP   { (<WSP> || <CRLF> <WSP>)* }
    token OCTET  { <[\x00..\xFF]>           }
    token SP     { " "                      }
    token VCHAR  { <[\x21..\x7E]>           }
    token WSP    { <SP> || <HTAB>           }
 }
diff --git a/rfc5234_abnf.txt b/rfc5234_abnf.txt
 rulelist       =  1*( rule / (*c-wsp c-nl) )

 rule           =  rulename defined-as elements c-nl
                       ; continues if next line starts
                       ;  with white space

 rulename       =  ALPHA *(ALPHA / DIGIT / "-")


 defined-as     =  *c-wsp ("=" / "=/") *c-wsp
                       ; basic rules definition and
                       ;  incremental alternatives

 elements       =  alternation *c-wsp

 c-wsp          =  WSP / (c-nl WSP)

 c-nl           =  comment / CRLF
                       ; comment or newline

 comment        =  ";" *(WSP / VCHAR) CRLF

 alternation    =  concatenation
                  *(*c-wsp "/" *c-wsp concatenation)

 concatenation  =  repetition *(1*c-wsp repetition)

 repetition     =  [repeat] element

 repeat         =  1*DIGIT / (*DIGIT "*" *DIGIT)

 element        =  rulename / group / option /
                  char-val / num-val / prose-val

 group          =  "(" *c-wsp alternation *c-wsp ")"

 option         =  "[" *c-wsp alternation *c-wsp "]"

 char-val       =  DQUOTE *(%x20-21 / %x23-7E) DQUOTE
                       ; quoted string of SP and VCHAR
                       ;  without DQUOTE

 num-val        =  "%" (bin-val / dec-val / hex-val)

 bin-val        =  "b" 1*BIT
                  [ 1*("." 1*BIT) / ("-" 1*BIT) ]
                       ; series of concatenated bit values
                       ;  or single ONEOF range

 dec-val        =  "d" 1*DIGIT
                  [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ]

 hex-val        =  "x" 1*HEXDIG
                  [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ]

 prose-val      =  "<" *(%x20-3D / %x3F-7E) ">"
                       ; bracketed string of SP and VCHAR
                       ;  without angles
                       ; prose description, to be used as
                       ;  last resort
	#
	# See also https://tools.ietf.org/html/rfc5234#section-4

	# If you want to trace how this works, install https://github.com/jnthn/grammar-debugger/
	# and uncomment below.
	#
	# use Grammar::Tracer;

	grammar ABNF {

	token TOP {
	<.ws> <rulelist>
	}

	token rulelist {
	(<rule> \|\| (<c-wsp>* <c-nl>))+
	}

	token rule {
	<rulename> <defined-as> <elements> <c-nl>
	}

	token rulename {
	<ALPHA> (<ALPHA> \|\| <DIGIT> \|\| "-")*
	}

	token defined-as {
	<c-wsp>* ("=" \|\| "=/") <c-wsp>*
	}

	token elements {
	<alternation> <c-wsp>*
	}

	token c-wsp {
	<WSP> \|\| (<c-nl> <WSP>)
	}

	token c-nl {
	<comment> \|\| <NL>
	}

	token comment {
	";" (<WSP> \|\| <VCHAR>)* <NL>
	}

	token alternation {
	<concatenation>
	(<c-wsp>* "/" <c-wsp>* <concatenation>)*
	}

	token concatenation {
	<repetition> (<c-wsp>+ <repetition>)*
	}

	token repetition {
	<repeat>? <element>
	}

	#
	# In orignal rfc5234, <repeat> was 1DIGIT / (DIGIT "" DIGIT)
	# But it failed to match against "1*(...)". So I swapped precedence.
	#
	token repeat {
	(<DIGIT>* "" <DIGIT>) \|\| <DIGIT>+
	}

	token element {
	<rulename> \|\| <group> \|\| <option> \|\|
	<char-val> \|\| <num-val> \|\| <prose-val>
	}

	token group {
	"(" <c-wsp>* <alternation> <c-wsp>* ")"
	}

	token option {
	"[" <c-wsp>* <alternation> <c-wsp>* "]"
	}

	token char-val {
	<DQUOTE> (<[\x20 \x21]> \|\| <[\x23..\x7E]>)* <DQUOTE>
	}

	token num-val {
	"%" (<bin-val> \|\| <dec-val> \|\| <hex-val>)
	}

	token bin-val {
	"b" <BIT>+
	( ("." <BIT>+)+ \|\| ("-" <BIT>+) )?
	}

	token dec-val {
	"d" <DIGIT>+
	( ("." <DIGIT>+)+ \|\| ("-" <DIGIT>+) )?
	}

	token hex-val {
	"x" <HEXDIG>+
	( ("." <HEXDIG>+)+ \|\| ("-" <HEXDIG>+) )?
	}

	token prose-val {
	"<" (<[\x20..\x3D]> \|\| <[\x3F..\x7E]>)* ">"
	}

	token NL { <CRLF> \|\| <LF> }

	token ALPHA { <[A..Z a..z]> }
	token BIT { <[0 1]> }
	token CHAR { <[\x01..\x7F]> }
	token LF { "\x0a" }
	token CR { "\x0d" }
	token CRLF { "\x0d\x0a" }
	token DIGIT { <[0..9]> }
	token DQUOTE { '"' }
	token HEXDIG { <DIGIT> \|\| <[A..F]> }
	token HTAB { "\t" }
	token LWSP { (<WSP> \|\| <CRLF> <WSP>)* }
	token OCTET { <[\x00..\xFF]> }
	token SP { " " }
	token VCHAR { <[\x21..\x7E]> }
	token WSP { <SP> \|\| <HTAB> }
	}
	rulelist = 1( rule / (c-wsp c-nl) )

	rule = rulename defined-as elements c-nl
	; continues if next line starts
	; with white space

	rulename = ALPHA *(ALPHA / DIGIT / "-")


	defined-as = c-wsp ("=" / "=/") c-wsp
	; basic rules definition and
	; incremental alternatives

	elements = alternation *c-wsp

	c-wsp = WSP / (c-nl WSP)

	c-nl = comment / CRLF
	; comment or newline

	comment = ";" *(WSP / VCHAR) CRLF

	alternation = concatenation
	(c-wsp "/" *c-wsp concatenation)

	concatenation = repetition (1c-wsp repetition)

	repetition = [repeat] element

	repeat = 1DIGIT / (DIGIT "" DIGIT)

	element = rulename / group / option /
	char-val / num-val / prose-val

	group = "(" c-wsp alternation c-wsp ")"

	option = "[" c-wsp alternation c-wsp "]"

	char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
	; quoted string of SP and VCHAR
	; without DQUOTE

	num-val = "%" (bin-val / dec-val / hex-val)

	bin-val = "b" 1*BIT
	[ 1("." 1BIT) / ("-" 1*BIT) ]
	; series of concatenated bit values
	; or single ONEOF range

	dec-val = "d" 1*DIGIT
	[ 1("." 1DIGIT) / ("-" 1*DIGIT) ]

	hex-val = "x" 1*HEXDIG
	[ 1("." 1HEXDIG) / ("-" 1*HEXDIG) ]

	prose-val = "<" *(%x20-3D / %x3F-7E) ">"
	; bracketed string of SP and VCHAR
	; without angles
	; prose description, to be used as
	; last resort