Created
September 3, 2016 13:22
-
-
Save huydx/7255082437ca00250cec1e14f163fe46 to your computer and use it in GitHub Desktop.
descent_parser.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "fmt" | |
// <regex> ::= <term> '|' <regex> | <term> | |
// <term> ::= { <factor> } | |
// <factor> ::= <base> { '*' } | |
// <base> ::= <char> | |
// | '\' <char> | |
// | '(' <regex> ')' | |
type Tree struct { | |
Name string | |
lex *Lexer | |
} | |
type Lexer struct { | |
input string | |
} | |
func (l *Lexer) peek() string { | |
return string(l.input[0]) | |
} | |
func (l *Lexer) eat(c string) { | |
if l.peek() == c { | |
l.input = l.input[1:] | |
} else { | |
panic(fmt.Errorf("illegal state")) | |
} | |
} | |
func (l *Lexer) next() string { | |
var c = l.peek() | |
l.eat(c) | |
return c | |
} | |
func (l *Lexer) more() bool { | |
return len(l.input) > 0 | |
} | |
type Node interface { | |
Type() NodeType | |
Tree() *Tree | |
String() string | |
} | |
type NodeType int | |
// Type returns itself and provides an easy default implementation | |
// for embedding in a Node. Embedded in all non-trivial Nodes. | |
func (t NodeType) Type() NodeType { | |
return t | |
} | |
const ( | |
NodeChoice NodeType = iota | |
NodeSequence | |
NodeRepetition | |
NodePrimitive | |
NodeBlank | |
) | |
func nodeType2String(t NodeType) string { | |
switch t { | |
case NodeChoice: return "NodeChoice" | |
case NodeSequence: return "NodeSequence" | |
case NodeRepetition: return "NodeRepetition" | |
case NodePrimitive: return "NodePrimitive" | |
case NodeBlank: return "NodeBlank" | |
default: | |
panic("unrecornize node type") | |
} | |
} | |
type ChoiceNode struct { | |
NodeType | |
tree *Tree | |
right Node | |
left Node | |
} | |
func (c *ChoiceNode) Tree() *Tree { | |
return c.tree | |
} | |
func (c *ChoiceNode) String() string { | |
return fmt.Sprintf("(%s, right :%s, left: %s)", nodeType2String(c.Type()), c.right.String(), c.left.String()) | |
} | |
type BlankNode struct { | |
NodeType | |
tree *Tree | |
} | |
func (c *BlankNode) String() string { | |
return fmt.Sprintf("%d", c.Type()) | |
} | |
func (c *BlankNode) Tree() *Tree { | |
return c.tree | |
} | |
type SequenceNode struct { | |
NodeType | |
tree *Tree | |
first Node | |
left Node | |
} | |
func (c *SequenceNode) Tree() *Tree { | |
return c.tree | |
} | |
func (c *SequenceNode) String() string { | |
return fmt.Sprintf("(%s %s %s)", nodeType2String(c.Type()), c.first.String(), c.left.String()) | |
} | |
type RepetitionNode struct { | |
NodeType | |
tree *Tree | |
internal Node | |
} | |
func (c *RepetitionNode) String() string { | |
return fmt.Sprintf("(%s %s)", nodeType2String(c.Type()), c.internal.String()) | |
} | |
func (c *RepetitionNode) Tree() *Tree { | |
return c.tree | |
} | |
type PrimitiveNode struct { | |
NodeType | |
tree *Tree | |
value string | |
} | |
func (c *PrimitiveNode) String() string { | |
return fmt.Sprintf("(%s %s)", nodeType2String(c.Type()), c.value) | |
} | |
func (c *PrimitiveNode) Tree() *Tree { | |
return c.tree | |
} | |
func (t *Tree) regex() Node { | |
var term = t.term() | |
if (t.lex.more() && t.lex.peek() == "|") { | |
t.lex.eat("|") | |
var regex = t.regex() | |
return &ChoiceNode{ | |
NodeChoice, | |
t, | |
term, | |
regex, | |
} | |
} else { | |
return term | |
} | |
} | |
func (t *Tree) term() Node { | |
var factor Node | |
factor = &BlankNode{NodeBlank, t} | |
for (t.lex.more() && t.lex.peek() != ")") && t.lex.peek() != "|" { | |
var nextfactor = t.factor() | |
factor = &SequenceNode{ | |
NodeSequence, | |
t, | |
factor, | |
nextfactor, | |
} | |
} | |
return factor | |
} | |
func (t *Tree) factor() Node { | |
var base = t.base() | |
for t.lex.more() && t.lex.peek() == "*" { | |
t.lex.eat("*") | |
base = &RepetitionNode{ | |
NodeRepetition, | |
t, | |
base, | |
} | |
} | |
return base | |
} | |
func (t *Tree) base() Node { | |
var p = t.lex.peek() | |
switch p { | |
case "(": | |
t.lex.eat("(") | |
var regex = t.regex() | |
t.lex.eat(")") | |
return regex | |
case "\\": | |
t.lex.eat("\\") | |
var esc = t.lex.next() | |
return &PrimitiveNode{ | |
NodePrimitive, | |
t, | |
esc, | |
} | |
default: | |
var esc = t.lex.next() | |
return &PrimitiveNode{ | |
NodePrimitive, | |
t, | |
esc, | |
} | |
} | |
} | |
func matcher(input string, node Node) bool { | |
switch node.Type() { | |
case NodeChoice: | |
n := node.(*ChoiceNode) | |
nodeLeft := n.left | |
nodeRight := n.right | |
return matcher(input, nodeLeft) || matcher(input, nodeRight) | |
case NodeRepetition: | |
//n := node.(*PrimitiveNode) | |
case NodePrimitive: | |
n := node.(*PrimitiveNode) | |
return input == n.value | |
case NodeSequence: | |
//n := node.(*SequenceNode) | |
case NodeBlank: | |
return true | |
default: | |
panic("invalid node type") | |
} | |
return false | |
} | |
func main() { | |
lex := &Lexer{input: "a*b"} | |
tree := &Tree{"tree", lex} | |
fmt.Println(tree.regex()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment