Skip to content

Instantly share code, notes, and snippets.

@cabloo
Created June 16, 2021 00:18
Show Gist options
  • Save cabloo/b36380da14bb71b1adc5df1bd21fe37a to your computer and use it in GitHub Desktop.
Save cabloo/b36380da14bb71b1adc5df1bd21fe37a to your computer and use it in GitHub Desktop.
import "fmt"
import "unicode/utf8"
type Regexp struct {
matchers []regexpMatch
}
type regexpMatch interface {
matches(in rune) bool
maxLength() int
minLength() int
}
type singleCharMatch struct {
matches1Char
char rune
}
type wildcardMatch struct {
matches1Char
}
type anyLengthMatch struct {
matchesAnyLength
matcher regexpMatch
}
type matchesAnyLength struct {}
type matches1Char struct {}
func (m anyLengthMatch) matches(in rune) bool {
return m.matcher.matches(in)
}
func (m singleCharMatch) matches(in rune) bool {
return in == m.char
}
func (m wildcardMatch) matches(in rune) bool {
return true
}
func (m matchesAnyLength) maxLength() int {
return -1
}
func (m matchesAnyLength) minLength() int {
return 0
}
func (m matches1Char) maxLength() int {
return 1
}
func (m matches1Char) minLength() int {
return 1
}
const wildcard rune = rune('.')
const anyLength rune = rune('*')
func removeNextRuneInString(in string) (char rune, length int, out string) {
char, length = utf8.DecodeRuneInString(in)
out = in[length:]
return
}
func removeNextRegexpMatcher(patternIn string) (matcher regexpMatch, pattern string) {
pattern = patternIn
count := 0
for char, _, nextP := removeNextRuneInString(pattern); char != utf8.RuneError; char, _, nextP = removeNextRuneInString(pattern) {
if count > 1 || (count == 1 && char != anyLength) {
break
}
count++
pattern = nextP
switch char {
case wildcard:
matcher = wildcardMatch{}
case anyLength:
if matcher == nil {
panic(fmt.Sprintf("any length (*) not immediately after matchable character in %s", patternIn))
}
matcher = anyLengthMatch{matcher: matcher}
default:
matcher = singleCharMatch{char: char}
}
}
return
}
func (r Regexp) prepareMatchesState(in string) (matches [][]bool, countChars int) {
countChars = utf8.RuneCountInString(in)
matches = make([][]bool, len(r.matchers)+1)
matches[0] = make([]bool, countChars+1)
for i := range r.matchers {
matches[i+1] = make([]bool, countChars+1)
}
matches[0][0] = true
return
}
func (r Regexp) Matches(in string) bool {
matches, countChars := r.prepareMatchesState(in)
for matcherIndex, matcher := range r.matchers {
matches[matcherIndex+1][0] = matches[matcherIndex][0] && matcher.minLength() <= 0
for charIndex, char := range in {
matched := r.charMatches(matches, matcher, matcherIndex, charIndex, char)
matches[matcherIndex+1][charIndex+1] = matched
}
}
// printState(matches)
return matches[len(r.matchers)][countChars]
}
func boolToShortString(in bool) (out string) {
if in {
return "x"
}
return "_"
}
func printState(state [][]bool) {
for i, r := range state {
fmt.Println()
fmt.Print(i, " ")
for _, c := range r {
fmt.Print(boolToShortString(c), " ")
}
}
}
func (r Regexp) charMatches(matches [][]bool, matcher regexpMatch, matcherIndex, charIndex int, char rune) bool {
if matcher.minLength() <= 0 && matches[matcherIndex][charIndex+1] {
return true
}
prevMatch := matches[matcherIndex][charIndex]
if matcher.maxLength() < 0 {
prevMatch = prevMatch || matches[matcherIndex+1][charIndex]
}
return prevMatch && matcher.matches(char)
}
func New(pattern string) Regexp {
tokens := []regexpMatch{}
for len(pattern) > 0 {
var matcher regexpMatch
matcher, pattern = removeNextRegexpMatcher(pattern)
tokens = append(tokens, matcher)
}
return Regexp{tokens}
}
func isMatch(s, p string) bool {
return New(p).Matches(s)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment