Last active
December 6, 2019 11:06
-
-
Save achun/5730664 to your computer and use it in GitHub Desktop.
Go 四则运算词法解析相关 由中缀表达式构建AST 逆波兰序列生成
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//ReversePolishnotation,RPN | |
//逆波兰表达式相关 | |
//lexer 是一个中缀数学四则计算表达式词法解析器 | |
package rpn | |
import ( | |
"errors" | |
) | |
type Token int | |
func (p Token) String() string { | |
return tokens[p] | |
} | |
const eof = -1 | |
const ( | |
TokenNil Token = iota | |
paren_beg //paren_beg | |
TokenLeftParen //"(" | |
TokenRightParen //")" | |
paren_end | |
literal_beg //literal_beg | |
TokenNumber //0..9 | |
TokenDot //"." | |
literal_end | |
operator_beg //operator_beg | |
TokenAdd //"+" | |
TokenSub //"-" | |
TokenDiv //"/" | |
TokenMul //"*" | |
operator_end | |
//TokenAssign //"=" | |
//词义token | |
TokenInt //整数 | |
TokenDecimal //十进制小数 | |
TokenFloat //浮点数 | |
TokenOperator //+-*/ | |
TokenValue | |
TokenExp //表达式 | |
TokenWhiteSpace | |
TokenError | |
) | |
var tokens = [...]string{ | |
TokenNil: "Nil", | |
TokenError: "Error", | |
TokenDot: "Dot", | |
TokenNumber: "Number", | |
TokenDecimal: "Decimal", | |
TokenAdd: "Add", | |
TokenSub: "Sub", | |
TokenMul: "Mul", | |
TokenDiv: "Div", | |
TokenWhiteSpace: "WhiteSpace", | |
TokenLeftParen: "LeftParen", | |
TokenRightParen: "RightParen", | |
} | |
// token变化关系,设置上一个token允许的类型 | |
var lexerwish = [...][]Token{ | |
TokenNumber: []Token{TokenNil, TokenNumber, TokenDecimal, TokenAdd, TokenSub, TokenMul, TokenDiv, TokenLeftParen}, | |
TokenDot: []Token{TokenNil, TokenNumber, TokenAdd, TokenSub, TokenMul, TokenDiv, TokenLeftParen}, | |
TokenDecimal: []Token{TokenDot, TokenAdd, TokenSub, TokenMul, TokenDiv, TokenLeftParen}, | |
TokenAdd: []Token{TokenNumber, TokenDecimal, TokenRightParen}, | |
TokenSub: []Token{TokenNumber, TokenDecimal, TokenRightParen}, | |
TokenMul: []Token{TokenNumber, TokenDecimal, TokenRightParen}, | |
TokenDiv: []Token{TokenNumber, TokenDecimal, TokenRightParen}, | |
TokenLeftParen: []Token{TokenNil, TokenAdd, TokenSub, TokenMul, TokenDiv, TokenLeftParen}, | |
TokenRightParen: []Token{TokenNumber, TokenDecimal, TokenRightParen}, | |
} | |
type stateFn func() stateFn | |
type lexer struct { | |
closed bool | |
input string | |
pos int //pos | |
runer rune //当前pos的字符 | |
token Token //上一个已经确定的 Token | |
value []rune //上一个token 对应的value | |
t Token //当前可变动的 token | |
v []rune //当前可变动的 token 对应的value | |
leftParenCounter int // (计数器 | |
rightParenCounter int // )计数器 | |
errMsg string //错误信息 | |
cha chan bool | |
} | |
//根据输入文本新建词法解析实例 | |
func NewLexer(input string) (p *lexer) { | |
p = new(lexer) | |
p.cha = make(chan bool) | |
p.input = input | |
go p.run() | |
return | |
} | |
func (p *lexer) run() { | |
fn := p.walk | |
for pos, r := range p.input { | |
p.pos = pos | |
p.runer = r | |
if p.closed { | |
print("closed ", pos, string(r)) | |
return | |
} | |
fn = fn() | |
if fn == nil { | |
print("fn == nil ") | |
p.Close() | |
p.token = TokenError | |
if p.errMsg == "" { | |
p.error("lexer error") | |
} | |
return | |
} | |
} | |
p.save() | |
p.Close() | |
if p.leftParenCounter != p.rightParenCounter { | |
p.error("rightParen Not paired") | |
} else { | |
p.token = TokenNil | |
p.v = []rune{} | |
} | |
} | |
func (p *lexer) Error() error { | |
if len(p.errMsg) == 0 { | |
return nil | |
} | |
return errors.New(p.errMsg) | |
} | |
//单步词法解析 | |
//返回: 词法 token 序号和字面值 | |
// 以下两种情况解析过程将自动终止 | |
// TokenError 表示发生了词法错误 | |
// TokenNil 表示解析结束 | |
func (p *lexer) Next() (t Token, v []rune) { | |
<-p.cha | |
//println("Next()", p.pos) | |
t = p.token | |
v = p.value | |
<-p.cha | |
return | |
} | |
//终止解析过程 | |
//因内部使用了 gorountine.如果需要中途终止解析过程,Close()提供结束 gorountine 的方法 | |
func (p *lexer) Close() { | |
if p.closed { | |
return | |
} | |
p.closed = true | |
close(p.cha) | |
} | |
//保存 p.t ,返回 p.walk 继续执行 | |
func (p *lexer) save() { | |
if p.closed || p.t == TokenNil { | |
return | |
} | |
p.token = p.t | |
p.value = p.v | |
p.cha <- true | |
p.cha <- true | |
p.v = []rune{} | |
} | |
//设置 p.t | |
func (p *lexer) emit(token Token) { | |
p.t = token | |
} | |
// append(p.v, p.runer) 返回 p.walk 继续执行 | |
func (p *lexer) append() { | |
p.v = append(p.v, p.runer) | |
} | |
//设置错误信息 | |
func (p *lexer) error(str string) { | |
p.token = TokenError | |
p.errMsg = "lexer error: " + str | |
} | |
func (p *lexer) walk() stateFn { | |
r := p.runer | |
var fn stateFn | |
var token Token | |
if r >= '0' && r <= '9' { | |
token = TokenNumber | |
fn = p.number | |
} else { | |
switch r { | |
case '\t', '\n', ' ': | |
token = TokenWhiteSpace | |
fn = p.whiteSpace | |
case '.': | |
token = TokenDot | |
fn = p.dot | |
case '+': | |
token = TokenAdd | |
fn = p.add | |
case '-': | |
token = TokenSub | |
fn = p.sub | |
case '*': | |
token = TokenMul | |
fn = p.mul | |
case '/': | |
token = TokenDiv | |
fn = p.div | |
case '(': | |
token = TokenLeftParen | |
fn = p.leftParen | |
case ')': | |
token = TokenRightParen | |
fn = p.rightParen | |
} | |
} | |
if fn == nil { | |
p.error("unknow char " + " \"" + string(r) + "\"") | |
return nil | |
} | |
for _, i := range lexerwish[token] { | |
if p.t == i { | |
return fn() | |
} | |
} | |
p.error("token " + token.String() + " \"" + string(r) + "\"") | |
return nil | |
} | |
func (p *lexer) eof() stateFn { | |
return nil | |
} | |
func (p *lexer) number() stateFn { | |
switch p.t { | |
default: | |
p.error("number") | |
return nil | |
case TokenLeftParen: | |
p.save() | |
p.emit(TokenNumber) | |
case TokenNil, TokenNumber: | |
p.emit(TokenNumber) | |
case TokenDot, TokenDecimal: | |
p.emit(TokenDecimal) | |
case TokenAdd, TokenSub, TokenMul, TokenDiv: | |
p.save() | |
p.emit(TokenNumber) | |
} | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) dot() stateFn { | |
switch p.t { | |
default: | |
p.error("dot") | |
return nil | |
case TokenNil: | |
case TokenNumber: | |
p.emit(TokenDecimal) | |
} | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) add() stateFn { | |
p.save() | |
p.emit(TokenAdd) | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) sub() stateFn { | |
p.save() | |
p.emit(TokenSub) | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) mul() stateFn { | |
p.save() | |
p.emit(TokenMul) | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) div() stateFn { | |
p.save() | |
p.emit(TokenDiv) | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) leftParen() stateFn { | |
p.leftParenCounter++ | |
p.save() | |
p.emit(TokenLeftParen) | |
p.append() | |
return p.walk | |
} | |
func (p *lexer) rightParen() stateFn { | |
if p.rightParenCounter >= p.leftParenCounter { | |
p.error("rightParen Not paired") | |
return nil | |
} | |
p.rightParenCounter++ | |
p.save() | |
p.emit(TokenRightParen) | |
p.append() | |
return p.walk | |
} | |
// 待完善 | |
func (p *lexer) whiteSpace() stateFn { | |
p.error("whiteSpace") | |
return nil | |
} | |
type node struct { | |
token Token | |
v []rune | |
as Token // 分类token | |
} | |
//ast树 | |
type tree struct { | |
node *node | |
left *tree | |
right *tree | |
parent *tree | |
errMsg string | |
} | |
//要求node已经保障了合法性 | |
//树节点次序由左至右 | |
func (t *tree) push(n *node) *tree { | |
switch n.as { | |
case TokenValue: | |
if t.left == nil { | |
t.left = &tree{node: n, parent: t} | |
} else if t.right == nil { | |
t.right = &tree{node: n, parent: t} | |
} else { | |
t.errMsg = "tree:not nil node of left and right " + string(n.v) | |
return nil | |
} | |
return t | |
case TokenOperator: | |
if t.node == nil { | |
t.node = n | |
return t | |
} | |
if t.right != nil { | |
x := opLeve[n.token] - opLeve[t.node.token] | |
if x > 0 { | |
t.right = &tree{node: n, left: t.right, parent: t} | |
t.right.left.parent = t.right | |
return t.right | |
} | |
if x < 0 && t.parent != nil { | |
return t.parent.push(n) | |
} | |
nt := &tree{node: n, left: t, parent: t.parent} | |
if t.parent != nil { | |
if t.parent.left == t { | |
t.parent.left = nt | |
} else { | |
t.parent.right = nt | |
} | |
} | |
t.parent = nt | |
return nt | |
} | |
t.errMsg = "tree:expect not nil node of right" | |
return nil | |
case TokenLeftParen: | |
if t.left == nil { | |
t.left = &tree{parent: t} | |
return t.left | |
} | |
if t.right == nil { | |
t.right = &tree{parent: t} | |
return t.right | |
} | |
t.errMsg = "tree:not nil node of left and right when TokenLeftParen" | |
return nil | |
case TokenRightParen: | |
return t.parent | |
} | |
t.errMsg = "tree:unknow node as " + n.token.String() | |
return nil | |
} | |
func (t *tree) root() *tree { | |
ret := t | |
for ret.parent != nil { | |
ret = ret.parent | |
} | |
return ret | |
} | |
func (t *tree) String(deep int) string { | |
prefix := "" | |
for i := deep; i > 0; i-- { | |
prefix += "\t" | |
} | |
str := "" | |
if t.node != nil { | |
str += t.node.token.String() + ":" + string(t.node.v) + "\n" | |
} else { | |
str += "NULL:\n" | |
} | |
if t.left != nil { | |
str += prefix + "\tLeft " + t.left.String(deep+1) | |
} | |
if t.right != nil { | |
str += prefix + "\tRight " + t.right.String(deep+1) | |
} | |
return str | |
} | |
//返回左叶子节点 | |
func (t *tree) leaf(stop *tree) *tree { | |
if t == stop { | |
return t.parent | |
} | |
if t.left == nil { | |
return t | |
} | |
return t.left.leaf(stop) | |
} | |
//消除有侧多余的空节点 | |
func (t *tree) less() { | |
leaf := t | |
for leaf != nil { | |
if leaf.node != nil && leaf.node.as == TokenValue { | |
break | |
} | |
if leaf.parent == nil { | |
leaf = leaf.right | |
continue | |
} | |
if leaf.node == nil { | |
if leaf.left == nil { | |
leaf.parent.right = leaf.right | |
leaf.right.parent = leaf.parent | |
} else if leaf.right == nil { | |
leaf.parent.right = leaf.left | |
leaf.left.parent = leaf.parent | |
} | |
} | |
leaf = leaf.right | |
} | |
leaf = t | |
for leaf != nil { | |
if leaf.node != nil && leaf.node.as == TokenValue { | |
break | |
} | |
if leaf.parent == nil { | |
leaf = leaf.left | |
continue | |
} | |
if leaf.node == nil { | |
if leaf.left == nil { | |
leaf.parent.left = leaf.right | |
leaf.right.parent = leaf.parent | |
} else if leaf.right == nil { | |
leaf.parent.left = leaf.left | |
leaf.left.parent = leaf.parent | |
} | |
} | |
leaf = leaf.left | |
} | |
} | |
//遍历树 | |
//fn 为遍历回调函数,他返回一般bool值,表示是否终止遍历 | |
func (t *tree) Walk(fn func(*tree) bool) { | |
t.walk(t, fn) | |
} | |
func (t *tree) walk(stop *tree, fn func(*tree) bool) bool { | |
if t.left != nil { | |
if t.left.walk(stop, fn) { | |
return true | |
} | |
} | |
if t.right != nil { | |
if t.right.walk(stop, fn) { | |
return true | |
} | |
} | |
if fn(t) { | |
return true | |
} | |
return t == stop | |
} | |
func (t *tree) Rpn() (ret []string) { | |
fn := func(tr *tree) bool { | |
ret = append(ret, string(tr.node.v)) | |
return false | |
} | |
t.Walk(fn) | |
return | |
} | |
var opLeve = [...]int{ | |
TokenAdd: 0, | |
TokenSub: 0, | |
TokenMul: 1, | |
TokenDiv: 1, | |
} | |
// 解析中缀表达式到tree | |
func Parse(input string) (*tree, error) { | |
l := NewLexer(input) | |
t := new(tree) | |
token, v := l.Next() | |
paren := 0 | |
for { | |
if token == TokenError { | |
return t, l.Error() | |
} | |
if token == TokenNil { | |
break | |
} | |
switch token { | |
default: | |
l.Close() | |
return t, errors.New("tree:unknow token " + token.String()) | |
case TokenDecimal, TokenNumber: | |
t = t.push(&node{token: token, v: v, as: TokenValue}) | |
case TokenAdd, TokenSub, TokenMul, TokenDiv: | |
t = t.push(&node{token: token, v: v, as: TokenOperator}) | |
case TokenLeftParen: | |
paren++ | |
t = t.push(&node{token: token, v: v, as: TokenLeftParen}) | |
case TokenRightParen: | |
paren-- | |
t = t.push(&node{token: token, v: v, as: TokenRightParen}) | |
//消除多余的()配对 | |
if paren == 0 { | |
//println("paren == 0");println(t.String(0));println("less") | |
t.less() | |
//println(t.String(0));println("===") | |
} | |
} | |
if t == nil { | |
l.Close() | |
return t, errors.New("tree:AST Error") | |
} | |
//println(t.root().String(0)) | |
token, v = l.Next() | |
} | |
return t.root(), nil | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package rpn | |
import ( | |
"fmt" | |
"testing" | |
) | |
func TestArithmetic(t *testing.T) { | |
fmt.Println("TestArithmetic: ") | |
ins := []string{ | |
"1+(2-3)", | |
"1+(2-3)*4", | |
"1+4*(2-3)", | |
"(0)-1+(1.5+1)+2*3/4+(((1+2)*4))", | |
} | |
for _, in := range ins { | |
ast, err := Parse(in) | |
if err != nil { | |
t.Error(err) | |
return | |
} | |
fmt.Println(in, "\n\t", ast.Rpn()) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment