huydx · August 30, 2016 01:18
diff --git a/simple_regex.go b/simple_regex.go
 package main

 import (
 	"fmt"
 )

 func match(regex string, text string) bool {
 	if string(regex[0]) == "^" {
 		return matchhere(regex[1:], text)
 	}
 	for {
 		if matchhere(regex, text) {
 			return true
 		} else {
 			if end(text) {
 				return false
 			}
 			text = text[1:]
 		}
 	}
 	return false
 }

 func matchhere(regex string, text string) bool {
 	fmt.Printf("function %s called params %s %s\n", "matchhere", regex, text)
 	if end(regex) {
 		return false
 	}
 	if string(regex[0]) == "$" && len(regex) == 1 {
 		return len(text) == 0
 	}

 	if len(regex) > 2 && string(regex[1]) == "*" {
 		return matchstar(string(regex[0]), regex[2:], text)
 	}

 	if len(text) > 0 && (string(regex[0]) == "." || regex[0] == text[0]) {
 		return matchhere(regex[1:], text[1:])
 	}
 	return false
 }

 func end(text string) bool {
 	return len(text) == 0
 }

 func matchstar(c string, regex string, text string) bool {
 	fmt.Printf("function %s called params %s %s %s\n", "matchstar", c, regex, text)
 	for {
 		fmt.Printf("  loop inside matchstar %s\n", text)
 		if len(text) > 0 && string(text[0]) == c {
 			text = text[1:]
 			continue
 		}
 		return matchhere(regex, text)
 	}
 }

 func main() {
 	fmt.Println(match("^fo*$", "foo"))
 	fmt.Println(match("^fo*bar$", "foo"))
 	fmt.Println(match("^fo*bar$", "foooooooooooobar"))
 }

 ////////////////////////
 Some note about parsing technique
 Reference link:
 - http://math.hws.edu/javanotes/c9/s5.html
 - http://perl.plover.com/Regex/article.html
 - http://matt.might.net/articles/parsing-regex-with-recursive-descent/
 - https://swtch.com/~rsc/regexp/regexp1.html
 - http://matt.might.net/articles/nonblocking-lexing-toolkit-based-on-regex-derivatives/
 - http://genius.cat-v.org/brian-kernighan/articles/beautiful

 lexer : recognize token
 parser : from token to build structure tree

 3 types of regex:
 - basic regular expression (BRE)
 - extended regular expression (ERE)
 - Perl compatible regular expression (PCRE)

 regex symbol has math equivalent meaning

 http://web.cse.ohio-state.edu/software/2231/web-sw2/extras/slides/27.Recursive-Descent-Parsing.pdf
 a recursive descent parser for regex. NOT all grammar suit for recursive descent
 - idea is to construct procedure for each kind of grammar
 - peek(), next(), eat(item)
 - LL(k) type : determistic by examining next k tokens

 Context free grammar
 - production rules describe "all" possible strings
 - production rules == simple replacement

 EBNF, ABNF: http://matt.might.net/articles/grammars-bnf-ebnf/

 - NFA:
     - special kind of finite machine
     - include of:
          - set of symbols
          - set of states
          - next-state function
          - subset of states which accept state
          - initial state s0
     - NFA represent: table or graph
     - epsilon transition: transition state on an empty string??

 - DFA:
     - NO epsilon transition

 - Problem: convert regex string to a datastructure to easy manipulate and pattern matching
     - convert regex to NFA
          - using thompson algorithm
          - it's like arithmetic evaluation

 YACC
 %{
     header
 }
 %union
 %token
 %type

 %%
 rules
 %%

 user setting

 http://loveruby.net/ja/rhg/book/yacc.html
 http://qiita.com/k0kubun/items/1b641dfd186fe46feb65
	package main

	import (
	"fmt"
	)

	func match(regex string, text string) bool {
	if string(regex[0]) == "^" {
	return matchhere(regex[1:], text)
	}
	for {
	if matchhere(regex, text) {
	return true
	} else {
	if end(text) {
	return false
	}
	text = text[1:]
	}
	}
	return false
	}

	func matchhere(regex string, text string) bool {
	fmt.Printf("function %s called params %s %s\n", "matchhere", regex, text)
	if end(regex) {
	return false
	}
	if string(regex[0]) == "$" && len(regex) == 1 {
	return len(text) == 0
	}

	if len(regex) > 2 && string(regex[1]) == "*" {
	return matchstar(string(regex[0]), regex[2:], text)
	}

	if len(text) > 0 && (string(regex[0]) == "." \|\| regex[0] == text[0]) {
	return matchhere(regex[1:], text[1:])
	}
	return false
	}

	func end(text string) bool {
	return len(text) == 0
	}

	func matchstar(c string, regex string, text string) bool {
	fmt.Printf("function %s called params %s %s %s\n", "matchstar", c, regex, text)
	for {
	fmt.Printf(" loop inside matchstar %s\n", text)
	if len(text) > 0 && string(text[0]) == c {
	text = text[1:]
	continue
	}
	return matchhere(regex, text)
	}
	}

	func main() {
	fmt.Println(match("^fo*$", "foo"))
	fmt.Println(match("^fo*bar$", "foo"))
	fmt.Println(match("^fo*bar$", "foooooooooooobar"))
	}

	////////////////////////
	Some note about parsing technique
	Reference link:
	- http://math.hws.edu/javanotes/c9/s5.html
	- http://perl.plover.com/Regex/article.html
	- http://matt.might.net/articles/parsing-regex-with-recursive-descent/
	- https://swtch.com/~rsc/regexp/regexp1.html
	- http://matt.might.net/articles/nonblocking-lexing-toolkit-based-on-regex-derivatives/
	- http://genius.cat-v.org/brian-kernighan/articles/beautiful

	lexer : recognize token
	parser : from token to build structure tree

	3 types of regex:
	- basic regular expression (BRE)
	- extended regular expression (ERE)
	- Perl compatible regular expression (PCRE)

	regex symbol has math equivalent meaning

	http://web.cse.ohio-state.edu/software/2231/web-sw2/extras/slides/27.Recursive-Descent-Parsing.pdf
	a recursive descent parser for regex. NOT all grammar suit for recursive descent
	- idea is to construct procedure for each kind of grammar
	- peek(), next(), eat(item)
	- LL(k) type : determistic by examining next k tokens

	Context free grammar
	- production rules describe "all" possible strings
	- production rules == simple replacement

	EBNF, ABNF: http://matt.might.net/articles/grammars-bnf-ebnf/

	- NFA:
	- special kind of finite machine
	- include of:
	- set of symbols
	- set of states
	- next-state function
	- subset of states which accept state
	- initial state s0
	- NFA represent: table or graph
	- epsilon transition: transition state on an empty string??

	- DFA:
	- NO epsilon transition

	- Problem: convert regex string to a datastructure to easy manipulate and pattern matching
	- convert regex to NFA
	- using thompson algorithm
	- it's like arithmetic evaluation

	YACC
	%{
	header
	}
	%union
	%token
	%type

	%%
	rules
	%%

	user setting

	http://loveruby.net/ja/rhg/book/yacc.html
	http://qiita.com/k0kubun/items/1b641dfd186fe46feb65