Last active
August 30, 2016 01:18
-
-
Save huydx/6a15f9a311c0ff3a2c1b1948b6c03582 to your computer and use it in GitHub Desktop.
simple_regex.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
) | |
func match(regex string, text string) bool { | |
if string(regex[0]) == "^" { | |
return matchhere(regex[1:], text) | |
} | |
for { | |
if matchhere(regex, text) { | |
return true | |
} else { | |
if end(text) { | |
return false | |
} | |
text = text[1:] | |
} | |
} | |
return false | |
} | |
func matchhere(regex string, text string) bool { | |
fmt.Printf("function %s called params %s %s\n", "matchhere", regex, text) | |
if end(regex) { | |
return false | |
} | |
if string(regex[0]) == "$" && len(regex) == 1 { | |
return len(text) == 0 | |
} | |
if len(regex) > 2 && string(regex[1]) == "*" { | |
return matchstar(string(regex[0]), regex[2:], text) | |
} | |
if len(text) > 0 && (string(regex[0]) == "." || regex[0] == text[0]) { | |
return matchhere(regex[1:], text[1:]) | |
} | |
return false | |
} | |
func end(text string) bool { | |
return len(text) == 0 | |
} | |
func matchstar(c string, regex string, text string) bool { | |
fmt.Printf("function %s called params %s %s %s\n", "matchstar", c, regex, text) | |
for { | |
fmt.Printf(" loop inside matchstar %s\n", text) | |
if len(text) > 0 && string(text[0]) == c { | |
text = text[1:] | |
continue | |
} | |
return matchhere(regex, text) | |
} | |
} | |
func main() { | |
fmt.Println(match("^fo*$", "foo")) | |
fmt.Println(match("^fo*bar$", "foo")) | |
fmt.Println(match("^fo*bar$", "foooooooooooobar")) | |
} | |
//////////////////////// | |
Some note about parsing technique | |
Reference link: | |
- http://math.hws.edu/javanotes/c9/s5.html | |
- http://perl.plover.com/Regex/article.html | |
- http://matt.might.net/articles/parsing-regex-with-recursive-descent/ | |
- https://swtch.com/~rsc/regexp/regexp1.html | |
- http://matt.might.net/articles/nonblocking-lexing-toolkit-based-on-regex-derivatives/ | |
- http://genius.cat-v.org/brian-kernighan/articles/beautiful | |
lexer : recognize token | |
parser : from token to build structure tree | |
3 types of regex: | |
- basic regular expression (BRE) | |
- extended regular expression (ERE) | |
- Perl compatible regular expression (PCRE) | |
regex symbol has math equivalent meaning | |
http://web.cse.ohio-state.edu/software/2231/web-sw2/extras/slides/27.Recursive-Descent-Parsing.pdf | |
a recursive descent parser for regex. NOT all grammar suit for recursive descent | |
- idea is to construct procedure for each kind of grammar | |
- peek(), next(), eat(item) | |
- LL(k) type : determistic by examining next k tokens | |
Context free grammar | |
- production rules describe "all" possible strings | |
- production rules == simple replacement | |
EBNF, ABNF: http://matt.might.net/articles/grammars-bnf-ebnf/ | |
- NFA: | |
- special kind of finite machine | |
- include of: | |
- set of symbols | |
- set of states | |
- next-state function | |
- subset of states which accept state | |
- initial state s0 | |
- NFA represent: table or graph | |
- epsilon transition: transition state on an empty string?? | |
- DFA: | |
- NO epsilon transition | |
- Problem: convert regex string to a datastructure to easy manipulate and pattern matching | |
- convert regex to NFA | |
- using thompson algorithm | |
- it's like arithmetic evaluation | |
YACC | |
%{ | |
header | |
} | |
%union | |
%token | |
%type | |
%% | |
rules | |
%% | |
user setting | |
http://loveruby.net/ja/rhg/book/yacc.html | |
http://qiita.com/k0kubun/items/1b641dfd186fe46feb65 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment