Skip to content

Instantly share code, notes, and snippets.

@momchil-velikov
Last active January 1, 2016 07:19
Show Gist options
  • Save momchil-velikov/8111004 to your computer and use it in GitHub Desktop.
Save momchil-velikov/8111004 to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"io"
"strings"
)
const (
None = iota
Cat
Alt
ZeroOrMore
OneOrMore
ZeroOrOne
CharClass
CharSeq
)
type operands [2]*ast
type ast struct {
op byte
txt string
operand operands
}
type parse_error struct {
nested error
msg string
}
func (e *parse_error) Error() string {
if e.nested == nil {
return e.msg
} else {
return fmt.Sprintf("parse error: %s: %s", e.msg, e.nested.Error())
}
}
func (t *ast) dot() {
if t == nil {
return
}
fmt.Printf("n%p [label=\"", t)
switch t.op {
case Cat:
fmt.Printf("concat")
case Alt:
fmt.Printf("alter")
case ZeroOrMore:
fmt.Printf("zero-or-more")
case OneOrMore:
fmt.Printf("one-or-more")
case ZeroOrOne:
fmt.Printf("zero-or-one")
case CharClass:
fmt.Printf("char-class: %s", t.txt)
case CharSeq:
fmt.Printf("char-seq: %s", t.txt)
}
fmt.Printf("\" ]\n")
if t.operand[0] != nil {
fmt.Printf("n%p -> n%p\n", t, t.operand[0])
t.operand[0].dot()
}
if t.operand[1] != nil {
fmt.Printf("n%p -> n%p\n", t, t.operand[1])
t.operand[1].dot()
}
}
func (t *ast) Dot() {
fmt.Println("digraph AST {\n")
t.dot()
fmt.Println("}\n")
}
func (t *ast) compact() *ast {
op0, op1 := t.operand[0], t.operand[1]
if op0 != nil {
op0 = op0.compact()
}
if op1 != nil {
op1 = op1.compact()
}
t.operand[0] = op0
t.operand[1] = op1
if op0 == nil || op1 == nil {
return t
}
if t.op == Cat {
if op0.op == CharSeq && op1.op == CharSeq {
return &ast{CharSeq, op0.txt + op1.txt, operands{nil, nil}}
} else if op0.op == CharSeq && op1.op == Cat && op1.operand[0].op == CharSeq {
p := &ast{CharSeq, op0.txt + op1.operand[0].txt, operands{nil, nil}}
return &ast{Cat, "", operands{p, op1.operand[1]}}
} else if op0.op == Cat && op0.operand[1].op == CharSeq && op1.op == CharSeq {
p := &ast{CharSeq, op0.operand[1].txt + op1.txt, operands{nil, nil}}
return &ast{Cat, "", operands{op0.operand[0], p}}
}
}
return t
}
// re := concat '|' concat
// re := concat
// concat := closure concat
// concat := closure
// closure := term '*'
// closure := term '?'
// closure := term '+'
// closure := term
// term := '(' re ')'
// term := '[' char-seq ']'
// term := char
func match(ch byte, r io.ByteScanner) error {
c, e := r.ReadByte()
if e == nil {
if c == ch {
return nil
} else {
r.UnreadByte()
return &parse_error{nil, fmt.Sprintf("expected '%c', got '%c'", ch, c)}
}
} else {
return e
}
}
func is_reserved(ch byte) bool {
return ch == '|' || ch == '*' || ch == '+' || ch == '?' || ch == '[' ||
ch == ']' || ch == '(' || ch == ')'
}
func parse_char_class(r io.ByteScanner) (*ast, error) {
txt := []byte{}
ch, e := r.ReadByte()
for e == nil {
if is_reserved(ch) {
r.UnreadByte()
break
}
txt = append(txt, ch)
ch, e = r.ReadByte()
}
if len(txt) == 0 {
return nil, &parse_error{nil, "Empty character class"}
} else {
return &ast{CharClass, string(txt), operands{nil, nil}}, nil
}
}
func parse_term(r io.ByteScanner) (*ast, error) {
ch, e := r.ReadByte()
if e != nil {
return nil, e
}
switch ch {
case '(':
if t, e := parse_re(r); e != nil {
return nil, e
} else if e = match(')', r); e != nil {
return nil, e
} else {
return t, nil
}
case '[':
if t, e := parse_char_class(r); e != nil {
return nil, e
} else if e = match(']', r); e != nil {
return nil, e
} else {
return t, nil
}
default:
if is_reserved(ch) {
return nil, &parse_error{nil,
fmt.Sprintf("Unexpected character: '%c'", ch)}
} else {
return &ast{CharSeq, string(ch), operands{nil, nil}}, nil
}
}
}
func parse_closure(r io.ByteScanner) (*ast, error) {
t, e := parse_term(r)
if e != nil {
return t, e
}
ch, e := r.ReadByte()
if e != nil {
return t, e
}
switch ch {
case '*':
return &ast{ZeroOrMore, "", operands{t, nil}}, nil
case '+':
return &ast{OneOrMore, "", operands{t, nil}}, nil
case '?':
return &ast{ZeroOrOne, "", operands{t, nil}}, nil
default:
r.UnreadByte()
return t, nil
}
}
func parse_concat(r io.ByteScanner) (*ast, error) {
t, e := parse_closure(r)
if e != nil {
return t, e
}
var ch byte
for ch, e = r.ReadByte(); e == nil; ch, e = r.ReadByte() {
r.UnreadByte()
if ch != '(' && ch != '[' && is_reserved(ch) {
return t, nil
} else {
p, e := parse_closure(r)
if e == nil || e == io.EOF {
t = &ast{Cat, "", operands{t, p}}
} else {
return nil, e
}
}
}
return t, e
}
func parse_re(r io.ByteScanner) (*ast, error) {
t, e := parse_concat(r)
if e != nil {
return t, e
}
var ch byte
for ch, e = r.ReadByte(); e == nil; ch, e = r.ReadByte() {
if ch == '|' {
p, e := parse_concat(r)
if e == nil || e == io.EOF {
t = &ast{Alt, "", operands{t, p}}
} else {
return nil, e
}
} else {
r.UnreadByte()
return t, nil
}
}
return t, e
}
func Parse(r io.ByteScanner) (*ast, error) {
t, e := parse_re(r)
if e == io.EOF {
return t.compact(), nil
} else {
return nil, e
}
}
type nfa_edge struct {
label string
dst int
}
type nfa_state struct {
succ []nfa_edge
}
type nfa struct {
start, end int
states []nfa_state
}
func (a *nfa) add_edge(from, to int, label string) {
s := append(a.states[from].succ, nfa_edge{label, to})
a.states[from].succ = s
}
func (a *nfa) add_state() int {
s := len(a.states)
a.states = append(a.states, nfa_state{[]nfa_edge(nil)})
return s
}
func (a *nfa) build_nfa(t *ast, start, end int) {
switch t.op {
case Cat:
m := a.add_state()
a.build_nfa(t.operand[0], start, m)
a.build_nfa(t.operand[1], m, end)
case Alt:
s0, e0 := a.add_state(), a.add_state()
a.build_nfa(t.operand[0], s0, e0)
s1, e1 := a.add_state(), a.add_state()
a.build_nfa(t.operand[1], s1, e1)
a.add_edge(start, s0, "")
a.add_edge(start, s1, "")
a.add_edge(e0, end, "")
a.add_edge(e1, end, "")
case ZeroOrMore:
s, e := a.add_state(), a.add_state()
a.build_nfa(t.operand[0], s, e)
a.add_edge(e, s, "")
a.add_edge(start, s, "")
a.add_edge(e, end, "")
a.add_edge(start, end, "")
case OneOrMore:
a.build_nfa(t.operand[0], start, end)
a.add_edge(end, start, "")
case ZeroOrOne:
s, e := a.add_state(), a.add_state()
a.build_nfa(t.operand[0], s, e)
a.add_edge(start, s, "")
a.add_edge(e, end, "")
a.add_edge(start, end, "")
case CharClass:
a.add_edge(start, end, "["+t.txt+"]")
case CharSeq:
a.add_edge(start, end, t.txt)
}
}
func (t *ast) BuildNFA() *nfa {
a := &nfa{}
a.start, a.end = a.add_state(), a.add_state()
a.build_nfa(t, a.start, a.end)
return a
}
func (s *nfa_state) dot(n int, a *nfa) {
fmt.Printf("n%d", n)
if n == a.start {
fmt.Printf("[color=green]")
} else if n == a.end {
fmt.Printf("[color=red]")
}
fmt.Println("")
for _, e := range s.succ {
fmt.Printf("n%d -> n%d [label=\"%s\"]\n", n, e.dst, e.label)
}
}
func (a *nfa) Dot() {
fmt.Println("digraph NFA { rankdir=LR;")
for i, s := range a.states {
s.dot(i, a)
}
fmt.Println("}")
}
func main() {
// t, err := Parse(strings.NewReader("(a|b)+ab"))
t, err := Parse(strings.NewReader("((ab|b*)+cd)+"))
// t, err := Parse(strings.NewReader("((a*b)c)+cd"))
// t, err := Parse(strings.NewReader("d+(.d*)?(E(p|m)?d+)?"))
// t, err := Parse(strings.NewReader("[0123456789]+(.[0123456789]*)?(E-?[0123456789]+)?"))
if err == nil {
t.Dot()
a := t.BuildNFA()
a.Dot()
} else {
fmt.Printf("error: %s\n", err.Error())
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment