Created
May 2, 2025 02:15
-
-
Save wchargin/08b4d504ef1d285f7b76a4969aa56b3e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Algebraic regular expression library. | |
package realg | |
import ( | |
"fmt" | |
"regexp" | |
"strings" | |
) | |
type Expr struct { | |
source string | |
atomic bool | |
} | |
func (e Expr) String() string { | |
return e.source | |
} | |
func (e Expr) atom() string { | |
if e.atomic { | |
return e.source | |
} | |
return fmt.Sprintf(`(?:%s)`, e.source) | |
} | |
func Literal(s string) Expr { | |
return Expr{source: regexp.QuoteMeta(s), atomic: len(s) == 1} | |
} | |
func Raw(source string) Expr { | |
return Expr{source: source} | |
} | |
func Union(cases ...Expr) Expr { | |
groupedCases := make([]string, len(cases)) | |
for i, e := range cases { | |
groupedCases[i] = e.atom() | |
} | |
return Expr{source: strings.Join(groupedCases, "|")} | |
} | |
func LiteralUnion(cases ...string) Expr { | |
groupedCases := make([]string, len(cases)) | |
for i, s := range cases { | |
groupedCases[i] = regexp.QuoteMeta(s) | |
} | |
return Expr{source: strings.Join(groupedCases, "|")} | |
} | |
func Concat(exprs ...Expr) Expr { | |
groupedParts := make([]string, len(exprs)) | |
for i, e := range exprs { | |
groupedParts[i] = e.atom() | |
} | |
return Expr{source: strings.Join(groupedParts, "")} | |
} | |
func (e Expr) Capture() Expr { | |
return Expr{source: fmt.Sprintf(`(%s)`, e.source), atomic: true} | |
} | |
var reGroupName = regexp.MustCompile(`^[A-Za-z0-9]+$`) | |
func (e Expr) Named(name string) (result Expr, ok bool) { | |
if !reGroupName.MatchString(name) { | |
return Expr{}, false | |
} | |
return Expr{source: fmt.Sprintf(`(?<%s>%s)`, name, e.source), atomic: true}, true | |
} | |
func (e Expr) MustNamed(name string) Expr { | |
e, ok := e.Named(name) | |
if !ok { | |
panic(fmt.Sprintf("invalid group name: %q", name)) | |
} | |
return e | |
} | |
func Many(e Expr) Expr { | |
return Expr{source: e.atom() + "*", atomic: true} | |
} | |
func AtLeastOne(e Expr) Expr { | |
return Expr{source: e.atom() + "+", atomic: true} | |
} | |
func (e Expr) Optional() Expr { | |
return Expr{source: e.atom() + "?", atomic: true} | |
} | |
func (e Expr) AtLeast(min int) Expr { | |
return Expr{source: fmt.Sprintf(`%s{%d,}`, e.atom(), min), atomic: true} | |
} | |
func (e Expr) AtMost(max int) Expr { | |
return Expr{source: fmt.Sprintf(`%s{0,%d}`, e.atom(), max), atomic: true} | |
} | |
func (e Expr) Repeat(min, max int) Expr { | |
return Expr{source: fmt.Sprintf(`%s{%d,%d}`, e.atom(), min, max), atomic: true} | |
} | |
func (e Expr) Anchored() Expr { | |
return Expr{source: fmt.Sprintf(`^%s$`, e.atom())} | |
} | |
func (e Expr) AtStart() Expr { | |
return Expr{source: fmt.Sprintf(`^%s`, e.atom())} | |
} | |
func (e Expr) AtEnd() Expr { | |
return Expr{source: fmt.Sprintf(`%s$`, e.atom())} | |
} | |
func (e Expr) Compile() (*regexp.Regexp, error) { | |
return regexp.Compile(e.source) | |
} | |
func (e Expr) MustCompile() *regexp.Regexp { | |
return regexp.MustCompile(e.source) | |
} | |
func Example() { | |
positiveInteger := Raw("[1-9][0-9]*") | |
sample := Concat( | |
LiteralUnion("apple", "orange").MustNamed("fruit"), | |
Concat( | |
LiteralUnion("cut", "diced").MustNamed("preparation"), | |
Raw("[a-z]{0,31}").MustNamed("description"), | |
).Optional(), | |
positiveInteger.MustNamed("sequence"), | |
Concat(Literal("-"), positiveInteger.MustNamed("count")).Optional(), | |
).Anchored() | |
sampleRE := sample.MustCompile() | |
fmt.Println(sampleRE) | |
for _, input := range []string{ | |
"apple1", | |
"orange114", | |
"apple52-1", | |
"orangetb1", | |
"orangecutonerow1", | |
"orangedicedchunky10-4", | |
"orangecutthisdescriptionisnottoolong1", | |
"orangecutthisdescriptionisjustabittoolong1", | |
"somethingelse6", | |
"badapple1", | |
} { | |
match := sampleRE.FindStringSubmatch(input) | |
if match == nil { | |
fmt.Printf("%s: invalid\n", input) | |
continue | |
} | |
fruit, prep, descr, seq, count := match[1], match[2], match[3], match[4], match[5] | |
fmt.Printf("%s: fruit=%q prep=%q descr=%q seq=%q count=%q\n", input, fruit, prep, descr, seq, count) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment