Skip to content

Instantly share code, notes, and snippets.

@peteroupc
Last active October 12, 2019 21:18
Show Gist options
  • Save peteroupc/177d89b387b609daaa44e0d498f46181 to your computer and use it in GitHub Desktop.
Save peteroupc/177d89b387b609daaa44e0d498f46181 to your computer and use it in GitHub Desktop.
ABNF parser and reader
class TokenSymbolParser
def initialize(token,prod)
@token=token
@prod=prod
end
def parse(str)
return _parseOne(@token,str,0,str.length)
end
private
def _parseOne(token,str,index,endIndex)
minOccurs=[token.minOccurs,0].max
maxOccurs=token.maxOccurs
count=0
while maxOccurs<0 || count<maxOccurs
idx=_parseOneInst(token,str,index,endIndex)
if idx<0
return -1 if count<minOccurs
return index
end
count+=1
index=idx
end
return index
end
def _parseOneInst(token,str,index,endIndex)
case token.kind
when :HEX
return index<endIndex && str[index].ord==token.value ? index+1 : -1
when :HEXRANGE
if index<endIndex && str[index].ord>=token.value[0] && str[index].ord<=token.value[1]
return index+1
end
return -1
when :WORD
return _parseOne(@prod[token.value],str,index,endIndex)
when :STRING
token.value.each_char{|x|
return -1 if index>=endIndex
iord=str[index].ord
xord=x[0].ord
index+=1
if (xord>=0x41 && xord<=0x5a)
return -1 if iord!=xord && iord!=xord+0x20
elsif (xord>=0x61 && xord<=0x7a)
return -1 if iord!=xord && iord!=xord-0x20
else
return -1 if iord!=xord
end
}
return index
when :SEQUENCE
token.each{|tok|
idx=_parseOne(tok,str,index,endIndex)
return -1 if idx<0
index=idx
}
return index
when :CHOICE
token.each{|tok|
idx=_parseOne(tok,str,index,endIndex)
return idx if idx>=0
}
return -1
end
end
end
#
# Represents a single node in an ABNF grammar.
#
class TokenSymbol
include Enumerable
attr_accessor :kind
attr_accessor :value
attr_reader :tokens
attr_accessor :parent
attr_accessor :minOccurs
attr_accessor :name
attr_accessor :maxOccurs
def initialize(kind=nil,value=nil)
@tokens=[]
@kind=kind
@value=value
@minOccurs=1
@maxOccurs=1
end
def each
@tokens.each {|item| yield item }
end
def isDelegate?
return true if self.kind==:SEQUENCE &&
self.minOccurs==1 && self.maxOccurs==1 &&
@tokens.length==1 && @tokens[0].kind==:WORD &&
@tokens[0].minOccurs==1 && @tokens[0].maxOccurs==1
return true if self.kind==:WORD &&
self.minOccurs==1 && self.maxOccurs==1
return false
end
def selfOrLast
if @kind==:CHOICE
return @tokens[@tokens.length-1]
else
return self
end
end
def length
return @tokens.length
end
def [](idx)
return @tokens[idx]
end
def minOccurs=(val)
val=-1 if val<=0
@minOccurs=val
end
def maxOccurs=(val)
val=-1 if val<=0
@maxOccurs=val
end
def setMinMax(mn,mx)
self.minOccurs=mn
self.maxOccurs=mx
return self
end
def clear
@tokens.clear
end
def copy
ret=TokenSymbol.new(@kind,@value)
ret.minOccurs=@minOccurs
ret.maxOccurs=@maxOccurs
for tok in @tokens
ret.addRaw(tok)
end
return ret
end
def add(tok)
sym=TokenSymbol.new(tok[0],tok[1])
sym.parent=self
@tokens.push(sym)
return sym
end
def addChild(child)
child.parent=self
@tokens.push(child)
return child
end
def addRaw(child)
@tokens.push(child)
return child
end
def insertChild(child,index)
child.parent=self
@tokens.insert(index,child)
return child
end
def inspectSimple
return "#{kind} #{value||name} #{minOccurs} #{maxOccurs}"
end
def inspectDepth(depth)
ret=""
for i in 0...depth
ret+=" "
end
ret+="#{kind} #{value||name} #{minOccurs} #{maxOccurs}\n"
for tok in @tokens
ret+=(tok ? tok.inspectDepth(depth+1) : (" "*(depth+1))+"?\n")
end
return ret
end
def toNamedStr
ret=to_s
ret2=ret.gsub(/^\(|\)$/,"")
ret=ret2 if !ret2.index("(") && !ret2.index(")")
return ret if !name
return name+" = "+ret
end
def minLength
return 0 if self.minOccurs<=0
case @kind
when :STRING, :HEX, :HEXRANGE
return self.hexLength*self.minOccurs
when :SEQUENCE
if @tokens.length==1
return @tokens[0].minLength*self.minOccurs
end
return self.choiceLength
when :CHOICE
if @tokens.length==1
return @tokens[0].minLength*self.minOccurs
end
return self.choiceLength
when :WORD
raise "Not supported"
end
return 0
end
def to_s
extra=""
ret=""
optional=false
if minOccurs<=0
if maxOccurs==1
optional=true
else
extra="*"+(maxOccurs<0 ? "" : maxOccurs.to_s)
end
elsif maxOccurs<=0
extra=minOccurs.to_s+"*"
elsif minOccurs==0 && maxOccurs==1
optional=true
elsif minOccurs!=maxOccurs
extra=minOccurs.to_s+"*"+maxOccurs.to_s
elsif minOccurs!=1
extra=minOccurs.to_s
end
case @kind
when :STRING
if value[/[\r\n]/]
rr=[]
for b in value.bytes
rr.push(sprintf("%%x%02x",b))
end
return rr.join(" ")
else
data=value.split("\"")
for i in 0...data.length
data[i]="\""+data[i]+"\""
end
ret=data.join(" %x22 ")
end
when :WORD
ret=@value
when :HEXRANGE
ret=sprintf("%%x%02x-%02x",@value[0],@value[1])
when :HEX
ret=sprintf("%%x%02x",@value)
when :SEQUENCE
ret=((optional || @tokens.length==1) ? "" : "(")[email protected]{|t| t.to_s }.join(" ")+
((optional || @tokens.length==1) ? "" : ")")
when :CHOICE
ret=((optional || @tokens.length==1) ? "" : "(")[email protected]{|t| t.to_s }.join(" / ")+
((optional || @tokens.length==1) ? "" : ")")
end
if optional
return "["+ret+"]"
else
return extra+ret
end
end
private
def _appendUtf8(str,codepoint)
return str if codepoint<0
if codepoint<=0x7F
str+=codepoint.chr
elsif codepoint<=0x7FF
str+=(0xC0|((codepoint>>6)&0x1F)).chr
str+=(0x80|(codepoint &0x3F)).chr
elsif codepoint<=0xFFFF
str+=(0xE0|((codepoint>>12)&0x0F)).chr
str+=(0x80|((codepoint>>6)&0x3F)).chr
str+=(0x80|(codepoint &0x3F)).chr
elsif codepoint<=0x10FFFF
str+=(0xF0|((codepoint>>18)&0x07)).chr
str+=(0x80|((codepoint>>12)&0x3F)).chr
str+=(0x80|((codepoint>>6)&0x3F)).chr
str+=(0x80|(codepoint &0x3F)).chr
end
return str
end
public
# Length of a string token
def stringLength
return (self.kind==:STRING) ? self.value.length : 0
end
# Minimum length of a hex token
def hexLength
return self.value.length if self.kind==:STRING
return self.minOccurs if (self.kind==:HEX || self.kind==:HEXRANGE)
return 0
end
# Recursively calculates the minimum length of a choice token
def choiceLength
if self.kind==:CHOICE
return self.map{|tok| tok.hexLength()*self.minOccurs }.min
end
if self.kind==:SEQUENCE && self.length==2
h=self[0].hexLength()
if self[1].kind==:HEX || self[1].kind==:HEXRANGE || self[1].kind==:STRING
h+=self[1].hexLength()
end
if self[1].kind==:CHOICE
h+=self[1].choiceLength()
end
return h*self.minOccurs
end
return 0
end
def isNonWhitespace?
if self.kind==:HEX
return @value!=0x20 && @value!=0x09 &&
@value!=0x0a && @value!=0x0d
elsif self.kind==:STRING
for b in @value.bytes
return true if b!=0x20 && b!=0x09 &&
b!=0x0a && b!=0x0d
end
return false
elsif self.kind==:HEXRANGE
return true if @value[0]>0x20
chars=[]
c=@value[0];while c<=[0x21,@value[1]].min
chars.push(c)
c+=1;end
chars.delete(0x20)
chars.delete(0x09)
chars.delete(0x0a)
chars.delete(0x0d)
return chars.length>0
end
return false
end
def productionContainsWhitespaceOnly?(prod)
s=self
while s.kind==:WORD
raise "Can't find production '#{s.value}'" if !prod[s.value]
s=prod[s.value]
end
if s.kind==:CHOICE || s.kind==:SEQUENCE
s.each{|t|
return false if !t.productionContainsWhitespaceOnly?(prod)
}
return true
end
return !s.isNonWhitespace?
end
def isMatch?(str,productions)
parser=TokenSymbolParser.new(self,productions)
return parser.parse(str)==str.length
end
#
# Generates a random UTF-8 string that fits this node's grammar.
# The 'productions' parameter is a hash of productions (nodes) to be used if
# this node refers to another ABNF production (node) by name, either
# directly or indirectly.
#
def randomString(productions, stack=nil)
isws=self.productionContainsWhitespaceOnly?(productions)
mn=@minOccurs<=0 ? 0 : @minOccurs
mx=@maxOccurs<0 ? ((1+rand(12))+(1+rand(12)))/2 : @maxOccurs
if mn<=0 && mx==1 && rand(100)<(isws ? 90 : 10)
# Optional production
return ""
end
if mn<=0 && mx > 0
mn=1
end
stack=[] if !stack
occur=(mx==mn) ? mn : mn+rand(mx-mn+1)
if isws
occur/=2
occur=mn if occur<mn
end
str=""
if mn==0 && isws && rand(100)<80
occur=0
end
if mn==1 && isws && rand(100)<80
occur=0
end
for i in 0...occur
case @kind
when :STRING
for b in self.value.bytes
if (b>=0x41 && b<=0x5a) || (b>=0x61 && b<=0x7a)
newletter=rand(2)==0 ? (b|0x20) : (b&0xDF)
raise if !((newletter>=0x41 && newletter<=0x5a) ||
(newletter>=0x61 && newletter<=0x7a))
str=_appendUtf8(str,newletter);
else
str=_appendUtf8(str,b)
end
end
when :WORD
stack.push(@value)
pr=productions[@value]
raise "Can't find production '#{@value}'" if !pr
str+=pr.randomString(productions, stack)
stack.pop
when :HEX
str=_appendUtf8(str,@value)
when :HEXRANGE
includesUpperLowerA=@value[0]<=0x41 && @value[1]>=0x41 &&
@value[0]<=0x61 && @value[1]>=0x61
rnd=0
while true
rnd=@value[0]==@value[1] ? @value[0] :
@value[0]+rand(@value[1]-@value[0]+1)
if includesUpperLowerA && !(rnd>=0x41 && rnd<=0x5a) &&
!(rnd>=0x61 && rnd<=0x7a) &&
!(rnd>=0x30 && rnd<=0x39) &&
rand(100)<90
next
end
if rnd<0x20 && rand(100)<90
next
end
break
end
str+=_appendUtf8(str,rnd)
when :SEQUENCE
self.each{|t|
str+=t.randomString(productions, stack)
}
when :CHOICE
while true
t=@tokens[rand(@tokens.length)]
if t.productionContainsWhitespaceOnly?(productions) &&
rand(100)<95
next
end
if t.kind==:WORD
recursing=stack.include?(t.value)
if recursing && (stack.length>50 || rand(100)<90)
next
end
end
str+=t.randomString(productions, stack)
break
end
end
end
return str
end
def allHexChoice
return false if @kind!=:CHOICE
for tok in @tokens
return false if tok.kind!=:HEX && tok.kind!=:HEXRANGE
end
return true
end
def isAsciiLetter(x)
return (x>=0x41 && x<=0x5a) || (x>=0x61 && x<=0x7a)
end
def allHexChoiceRecursiveAnyOccurs
return false if @kind!=:CHOICE
for tok in @tokens
return false if tok.kind!=:HEX && tok.kind!=:HEXRANGE && !tok.allHexChoiceRecursiveAnyOccurs()
end
return true
end
def _hexChoiceTokens(hextokens)
raise "Internal error" if @kind!=:CHOICE
for tok in @tokens
raise "Internal error" if tok.minOccurs!=1 || tok.maxOccurs!=1
if tok.kind==:CHOICE
tok._hexChoiceTokens(hextokens)
elsif tok.kind==:HEX || tok.kind==:HEXRANGE
hextokens.push(tok)
else
raise "Internal error"
end
end
end
def allHexChoiceRecursive
return false if @kind!=:CHOICE
for tok in @tokens
return false if tok.minOccurs!=1 || tok.maxOccurs!=1
return false if tok.kind!=:HEX && tok.kind!=:HEXRANGE && !tok.allHexChoiceRecursive()
end
return true
end
# Simplifies an ABNF tree
def simplify
return false if @kind==:HEX ||@kind==:HEXRANGE
changed=true
iter=0
while changed
changed=false
iter+=1; raise self.inspect if iter>10
if @kind==:SEQUENCE && @tokens.length>1
i=0;while i<@tokens.length
tok=@tokens[i]
if tok.kind==:SEQUENCE && tok.minOccurs==1 && tok.maxOccurs==1
newtokens=@tokens[0,i]
endtokens=@tokens[i+1,@tokens.length]
self.clear
for t in newtokens;self.addChild(t);end
for j in 0...tok.length; self.addChild(tok[j]); end
for t in endtokens;self.addChild(t);end
i-=1
changed=true
end
i+=1
end
end
if @kind==:SEQUENCE && @tokens.length==1
tok=@tokens[0]
if (tok.kind==:WORD || tok.kind==:STRING || tok.kind==:HEX || tok.kind==:HEXRANGE) &&
((tok.minOccurs==1 && tok.maxOccurs==1) || (self.minOccurs==1 && self.maxOccurs==1))
self.kind=tok.kind
self.value=tok.value
if (self.minOccurs==1 && self.maxOccurs==1)
self.minOccurs=tok.minOccurs
self.maxOccurs=tok.maxOccurs
end
self.name=tok.name
self.clear
changed=true
elsif (tok.kind==:CHOICE || tok.kind==:SEQUENCE) &&
((tok.minOccurs==1 && tok.maxOccurs==1) || (self.minOccurs==1 && self.maxOccurs==1))
self.clear
for i in 0...tok.length
self.addChild(tok[i])
end
self.kind=tok.kind
self.value=tok.value
if (self.minOccurs==1 && self.maxOccurs==1)
self.minOccurs=tok.minOccurs
self.maxOccurs=tok.maxOccurs
end
self.name=tok.name
changed=true
elsif (tok.kind==:CHOICE || tok.kind==:SEQUENCE) &&
self.minOccurs==0 && self.maxOccurs==1 && tok.minOccurs==1 && tok.maxOccurs==-1
self.clear
for i in 0...tok.length
self.addChild(tok[i])
end
self.kind=tok.kind
self.value=tok.value
self.minOccurs=0
self.maxOccurs=-1
self.name=tok.name
changed=true
end
end
if @kind==:CHOICE
done=false
while !done
done=true
# isHexRangeRec = Choice of hex values and/or ranges of hex
# values, each of which can occur only once
isHexRangeRec = self.allHexChoiceRecursive()
if isHexRangeRec
hextokens=[]
self._hexChoiceTokens(hextokens)
@tokens=hextokens
end
i=0;while i<@tokens.length
tok=@tokens[i]
if !tok
i+=1; next
end
if (tok.kind==:HEX || tok.kind==:HEXRANGE) && !isHexRangeRec
# Simplify runs of hex/hexrange
runsize=1
[email protected]
j=i+1;while j<@tokens.length
next if @tokens[j]==nil
if (@tokens[j].kind!=:HEX && @tokens[j].kind!=:HEXRANGE)
tokenend=j;break
end
runsize+=1
j+=1;end
if runsize>1 && runsize<@tokens.length
tempchoice=TokenSymbol.new(:CHOICE)
for k in 0...runsize
tempchoice.addRaw(@tokens[i+k])
end
tempchoice.simplify
newrunsize=runsize
if tempchoice.kind==:CHOICE && tempchoice.tokens.length<runsize
for k in 0...tempchoice.tokens.length
@tokens[i+k]=tempchoice[k]
end
newrunsize=tempchoice.tokens.length; done=false; changed=true
for k in (i+newrunsize)...tokenend
@tokens[k]=nil
end
elsif tempchoice.kind==:HEX || tempchoice.kind==:HEXRANGE
@tokens[i]=tempchoice
newrunsize=1; done=false; changed=true
for k in (i+newrunsize)...tokenend
@tokens[k]=nil
end
end
tok=@tokens[i]
end
i=tokenend
else
i+=1
end
end
@tokens.compact!
i=0;while i<@tokens.length
tok=@tokens[i]
if !isHexRangeRec && !(tok.kind==:HEX || tok.kind==:HEXRANGE)
# don't combine past non-hex/hexrange
break
end
if (tok.kind==:HEX || tok.kind==:HEXRANGE)
found=false
tokMin=(tok.kind==:HEX) ? tok.value : tok.value[0]
tokMax=(tok.kind==:HEX) ? tok.value : tok.value[1]
j=i+1;while j<@tokens.length
tok2=@tokens[j]
j+=1
next if !tok2 || tok==tok2
if !isHexRangeRec && !(tok.kind==:HEX || tok.kind==:HEXRANGE)
# don't combine past non-hex/hexrange
break
end
if tok2.kind==:HEXRANGE && (
(isHexRangeRec && tok2.minOccurs==tok.minOccurs && tok2.maxOccurs==tok.maxOccurs) ||
(tok2.minOccurs==1 && tok.minOccurs==1 && tok2.maxOccurs==1 && tok.maxOccurs==1))
if tok2.value[0]==tokMax+1
tok2.value[0]=tokMin
tok2.name=nil
changed=true
found=true; break
elsif tok2.value[1]==tokMin-1
tok2.value[1]=tokMax
tok2.name=nil
changed=true
found=true; break
end
elsif tok2.kind==:HEX && (
(isHexRangeRec && tok2.minOccurs==tok.minOccurs &&
tok2.maxOccurs==tok.maxOccurs) ||
(tok2.minOccurs==1 && tok.minOccurs==1 &&
tok2.maxOccurs==1 && tok.maxOccurs==1))
if tok2.value==tokMax+1
tok2.kind=:HEXRANGE
tok2.value=[tokMin,tok2.value]
tok2.name=nil
changed=true
found=true; break
elsif tok2.value==tokMin-1
tok2.kind=:HEXRANGE
tok2.value=[tok2.value,tokMax]
changed=true
tok2.name=nil
found=true; break
end
end
end
if found
@tokens.delete_at(i)
i-=1
done=false
end
end
i+=1
end
end
end
if @kind==:CHOICE && ((self.minOccurs==1 && self.maxOccurs==1) ||
self.allHexChoiceRecursive())
done=false
while !done
done=true
i=0;while i<@tokens.length
tok=@tokens[i]
if tok.kind==:CHOICE && tok.length>1 && tok.minOccurs==1 &&
tok.maxOccurs==1
oldPosition=i
@tokens.delete_at(oldPosition)
i-=1
changed=true
done=false
for j in 0...tok.length
self.insertChild(tok[tok.length-1-j],oldPosition)
end
end
i+=1
end
end
end
if @kind==:STRING && @value.length==1 && !value[/[A-Za-z]/]
self.kind=:HEX
[email protected][0]
changed=true
end
if @kind==:SEQUENCE
for i in [email protected]
tok=@tokens[i]
nexttok=@tokens[i+1]
if nexttok && tok && tok.minOccurs==1 && tok.maxOccurs==1 &&
nexttok.minOccurs==1 && nexttok.maxOccurs==1
if tok.kind==:STRING && nexttok.kind==:STRING
tok.value+=nexttok.value
changed=true
@tokens[i+1]=nil
next
elsif tok.kind==:HEX && nexttok.kind==:HEX &&
!self.isAsciiLetter(tok.value) &&
!self.isAsciiLetter(nexttok.value)
tok.value=tok.value.chr
tok.value+=nexttok.value.chr
tok.kind=:STRING
changed=true
@tokens[i+1]=nil
next
end
end
end
end
@tokens.compact!
if (@kind==:CHOICE || @kind==:SEQUENCE) && @tokens.length==1 &&
@tokens[0].minOccurs==1 &&
@tokens[0].maxOccurs==1
self.kind=@tokens[0].kind
self.value=@tokens[0].value
@name=@tokens[0].name if !@name || @name.length==0
@tokens=@tokens[0].tokens
changed=true
end
for tok in @tokens
if tok.kind!=:HEX && tok.kind!=:HEXRANGE
oldkind=tok.kind
oldtok=tok.inspect
tok.simplify
if oldkind!=tok.kind
changed=true
else
changed|=(oldtok!=tok.inspect)
end
end
end
end
return self
end
def inspect
inspectDepth(0)
end
end
class ABNF
attr_reader :indexedTokens
attr_reader :maxLengthTokens
def initialize
@indexedTokens={}
@maxLengthTokens={}
end
#
# Parses a string containing ABNF productions.
# Returns a hash whose keys are the names and the
# values are ABNF grammar trees. There are six kinds of tokens:
# :SEQUENCE - matches all of the grammar nodes in order
# :CHOICE - matches one of the grammar nodes, to be
# checked in order
# :HEX - matches a single byte or character
# :HEXRANGE - matches a single byte or character within
# a range of values
# :STRING - matches an ASCII case-insensitive string
# :WORD - refers to another ABNF production
#
def parse(abnf)
abnflist={}
for a in splitAbnfLines(abnf)
parseAbnf(a,abnflist)
end
return abnflist
end
private
def isTokenStart(tok)
return false if !tok
case tok[0]
when :WORD,:STRING,:HEX,:HEXRANGE,:PARENOPEN
return true
else
return false
end
end
def parseAbnf(abnf,abnflist)
abnftest=[abnf]
tokens=[]
while true
token=nextToken(abnftest)
raise "Invalid ABNF: "+abnf if !token
break if token[0]==:END
tokens.push(token)
end
index=0
while index<tokens.length
if tokens[index][0]==:TOKENVALUE
@indexedTokens[tokens[index][1][0]]=tokens[index][1][1]
index += 1
elsif tokens[index][0]==:MAXLENGTH
@maxLengthTokens[tokens[index][1][0]]=tokens[index][1][1]
index += 1
else
break
end
end
if tokens.length-index>2
if tokens[index][0]==:WORD && tokens[index+1][0]==:EQUALS
name=tokens[index][1]
root=TokenSymbol.new(:SEQUENCE)
node=root
nextMinOccurs=nil
nextMaxOccurs=nil
for i in index+2...tokens.length
tok=tokens[i]
# Includes internal token :CSSTRING, for case-sensitive strings,
# converting them to :SEQUENCE tokens
case tok[0]
when :WORD, :STRING, :CSSTRING, :HEX, :HEXRANGE
if nextMinOccurs
if tok[0]==:STRING && tok[1].length==1 && tok[1][/[A-Za-z]/]
newtok=node.add([:CHOICE,nil])
newtok.add([:HEX,tok[1].upcase.bytes[0]])
newtok.add([:HEX,tok[1].downcase.bytes[0]])
newtok.setMinMax(nextMinOccurs,nextMaxOccurs)
nextMinOccurs=nil
else
newtok=nil
if tok[0]==:CSSTRING
newtok=node.add([:SEQUENCE,nil])
for b in tok[1].bytes
newtok.add([:HEX,b])
end
p newtok
else
newtok=node.add(tok)
end
newtok.setMinMax(nextMinOccurs,nextMaxOccurs)
nextMinOccurs=nil
end
else
if tok[0]==:STRING && tok[1].length==1 && tok[1][/[A-Za-z]/]
newtok=node.add([:CHOICE,nil])
newtok.add([:HEX,tok[1].upcase.bytes[0]])
newtok.add([:HEX,tok[1].downcase.bytes[0]])
else
newtok=nil
if tok[0]==:CSSTRING
newtok=node.add([:SEQUENCE,nil])
for b in tok[1].bytes
newtok.add([:HEX,b])
end
else
node.add(tok)
end
end
end
when :SLASH
if node.kind==:SEQUENCE
if node.parent && node.parent.kind==:CHOICE
node=node.parent.addChild(TokenSymbol.new(:SEQUENCE))
else
node.kind=:CHOICE
node2=TokenSymbol.new(:SEQUENCE)
for i in 0...node.length
node2.addChild(node[i])
end
node.clear
node.addChild(node2)
node=node.addChild(TokenSymbol.new(:SEQUENCE))
end
else
node=node.addChild(TokenSymbol.new(:SEQUENCE))
end
when :PARENOPEN
node=node.addChild(TokenSymbol.new(:SEQUENCE))
if nextMinOccurs
node.setMinMax(nextMinOccurs,nextMaxOccurs)
nextMinOccurs=nil
end
when :PARENCLOSE, :BRACKETCLOSE
node=node.parent
if node && node.kind==:CHOICE
node=node.parent
end
when :BRACKETOPEN
node=node.addChild(TokenSymbol.new(:SEQUENCE))
node.minOccurs=0
when :ATLEASTNUMTIMES
raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
nextMinOccurs=tokens[i][1]
nextMaxOccurs=-1
when :NUMTIMES
raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
nextMinOccurs=tokens[i][1]
nextMaxOccurs=tokens[i][1]
when :MINMAXTIMES
raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
nextMinOccurs=tokens[i][1][0]
nextMaxOccurs=tokens[i][1][1]
when :ANYNUMTIMES
raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
nextMinOccurs=-1
nextMaxOccurs=-1
when :TOKENVALUE
@indexedTokens[tokens[i][1][0]]=tokens[i][1][1]
when :MAXLENGTH
@maxLengthTokens[tokens[i][1][0]]=tokens[i][1][1]
else
raise "Invalid ABNF: "+abnf
end
end
root.simplify
root.name=name
if abnflist[name]
p "Warning: "+name+" defined already"
end
abnflist[name]=root
end
end
end
def chop(str,y)
return str[y.length,str.length]
end
def nextToken(xa)
x=xa[0]
x=x.gsub(/\A\s+/,"")
if x.length==0
return [:END,nil]
end
if x[/\A;(.*)/]
# NOTE: This is an extension of ABNF, in
# order to allow numbers to be assigned
# to ABNF productions
xa[0]=chop(x,$&)
cmt=$1
if cmt[/\A\s*TOKEN\:\s*(\S+)\s+(\d+)/]
return [:TOKENVALUE,[$1,$2.to_i]]
end
# NOTE: This is an extension of ABNF, in
# order to assign a maximum string length for
# ABNF productions
if cmt[/\A\s*MAXLENGTH\:\s*(\S+)\s+(\d+)/]
return [:MAXLENGTH,[$1,$2.to_i]]
end
# comment
return nextToken(xa)
end
if x[/\A\(\s*/]
xa[0]=chop(x,$&)
return [:PARENOPEN,nil]
elsif x[/\A\)\s*/]
xa[0]=chop(x,$&)
return [:PARENCLOSE,nil]
elsif x[/\A\[\s*/]
xa[0]=chop(x,$&)
return [:BRACKETOPEN,nil]
elsif x[/\A\]\s*/]
xa[0]=chop(x,$&)
return [:BRACKETCLOSE,nil]
elsif x[/\A\/\s*/]
xa[0]=chop(x,$&)
return [:SLASH,nil]
elsif x[/\A([0-9]+)\*([0-9]+)/]
xa[0]=chop(x,$&)
return [:MINMAXTIMES,[$1.to_i,$2.to_i]]
elsif x[/\A\*([0-9]+)/]
xa[0]=chop(x,$&)
return [:MINMAXTIMES,[0,$1.to_i]]
elsif x[/\A([0-9]+)\*/]
xa[0]=chop(x,$&)
return [:ATLEASTNUMTIMES,$&.to_i]
elsif x[/\A\*/]
xa[0]=chop(x,$&)
return [:ANYNUMTIMES,nil]
elsif x[/\A<\">/]
xa[0]=chop(x,$&)
return [:STRING,"\""]
elsif x[/\A(?:\%i)?\"([^\"]+)\"/]
# Case-insensitive string
xa[0]=chop(x,$&)
return [:STRING,$1]
elsif x[/\A\%s\"([^\"]+)\"/]
# Case-sensitive string
xa[0]=chop(x,$&)
return [:CSSTRING,$1]
elsif x[/\A\%x([0-9A-Fa-f]+)-([0-9A-Fa-f]+)/]
xa[0]=chop(x,$&)
return [:HEXRANGE,[$1.to_i(16),$2.to_i(16)]]
elsif x[/\A\%x([0-9A-Fa-f]+)/]
xa[0]=chop(x,$&)
hexval=$1.to_i(16)
x=xa[0]
if x[/\A\.([0-9A-Fa-f]+)/]
str=hexval.chr
while x[/\A\.([0-9A-Fa-f]+)/]
xa[0]=chop(x,$&)
str+=($1.to_i(16)).chr
x=xa[0]
end
return [:CSSTRING,str]
else
return [:HEX,hexval]
end
elsif x[/\A\%d([0-9]+)-([0-9]+)/]
xa[0]=chop(x,$&)
return [:HEXRANGE,[$1.to_i(10),$2.to_i(10)]]
elsif x[/\A\%d([0-9]+)/]
xa[0]=chop(x,$&)
hexval=$1.to_i(10)
x=xa[0]
if x[/\A\.([0-9]+)/]
str=hexval.chr
while x[/\A\.([0-9]+)/]
xa[0]=chop(x,$&)
str+=($1.to_i(10)).chr
x=xa[0]
end
return [:CSSTRING,str]
else
return [:HEX,hexval]
end
elsif x[/\A\:?\=/]
xa[0]=chop(x,$&)
return [:EQUALS,nil]
elsif x[/\A([0-9]+)/]
xa[0]=chop(x,$&)
return [:NUMTIMES,$&.to_i]
elsif x[/\A([A-Za-z0-9_-]+)/]
xa[0]=chop(x,$&)
return [:WORD,$&]
end
return nil
end
def splitAbnfLines(x)
lines=x.split(/\r?\n/)
ret=[]
curline=nil
filebegin=""
for line in lines
if line[/^\s*([A-Za-z0-9_-]+)\s*\:?\=/]
ret.push(curline) if curline && curline.length>0
curline=filebegin+line+"\r\n"
filebegin=""
elsif curline && curline.length>0
curline+=line+"\r\n"
else
filebegin+=line+"\r\n"
end
end
ret.push(curline||filebegin)
return ret
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment