Skip to content

Instantly share code, notes, and snippets.

@wchargin
Created May 2, 2025 02:15
Show Gist options
  • Save wchargin/08b4d504ef1d285f7b76a4969aa56b3e to your computer and use it in GitHub Desktop.
Save wchargin/08b4d504ef1d285f7b76a4969aa56b3e to your computer and use it in GitHub Desktop.
// Algebraic regular expression library.
package realg
import (
"fmt"
"regexp"
"strings"
)
type Expr struct {
source string
atomic bool
}
func (e Expr) String() string {
return e.source
}
func (e Expr) atom() string {
if e.atomic {
return e.source
}
return fmt.Sprintf(`(?:%s)`, e.source)
}
func Literal(s string) Expr {
return Expr{source: regexp.QuoteMeta(s), atomic: len(s) == 1}
}
func Raw(source string) Expr {
return Expr{source: source}
}
func Union(cases ...Expr) Expr {
groupedCases := make([]string, len(cases))
for i, e := range cases {
groupedCases[i] = e.atom()
}
return Expr{source: strings.Join(groupedCases, "|")}
}
func LiteralUnion(cases ...string) Expr {
groupedCases := make([]string, len(cases))
for i, s := range cases {
groupedCases[i] = regexp.QuoteMeta(s)
}
return Expr{source: strings.Join(groupedCases, "|")}
}
func Concat(exprs ...Expr) Expr {
groupedParts := make([]string, len(exprs))
for i, e := range exprs {
groupedParts[i] = e.atom()
}
return Expr{source: strings.Join(groupedParts, "")}
}
func (e Expr) Capture() Expr {
return Expr{source: fmt.Sprintf(`(%s)`, e.source), atomic: true}
}
var reGroupName = regexp.MustCompile(`^[A-Za-z0-9]+$`)
func (e Expr) Named(name string) (result Expr, ok bool) {
if !reGroupName.MatchString(name) {
return Expr{}, false
}
return Expr{source: fmt.Sprintf(`(?<%s>%s)`, name, e.source), atomic: true}, true
}
func (e Expr) MustNamed(name string) Expr {
e, ok := e.Named(name)
if !ok {
panic(fmt.Sprintf("invalid group name: %q", name))
}
return e
}
func Many(e Expr) Expr {
return Expr{source: e.atom() + "*", atomic: true}
}
func AtLeastOne(e Expr) Expr {
return Expr{source: e.atom() + "+", atomic: true}
}
func (e Expr) Optional() Expr {
return Expr{source: e.atom() + "?", atomic: true}
}
func (e Expr) AtLeast(min int) Expr {
return Expr{source: fmt.Sprintf(`%s{%d,}`, e.atom(), min), atomic: true}
}
func (e Expr) AtMost(max int) Expr {
return Expr{source: fmt.Sprintf(`%s{0,%d}`, e.atom(), max), atomic: true}
}
func (e Expr) Repeat(min, max int) Expr {
return Expr{source: fmt.Sprintf(`%s{%d,%d}`, e.atom(), min, max), atomic: true}
}
func (e Expr) Anchored() Expr {
return Expr{source: fmt.Sprintf(`^%s$`, e.atom())}
}
func (e Expr) AtStart() Expr {
return Expr{source: fmt.Sprintf(`^%s`, e.atom())}
}
func (e Expr) AtEnd() Expr {
return Expr{source: fmt.Sprintf(`%s$`, e.atom())}
}
func (e Expr) Compile() (*regexp.Regexp, error) {
return regexp.Compile(e.source)
}
func (e Expr) MustCompile() *regexp.Regexp {
return regexp.MustCompile(e.source)
}
func Example() {
positiveInteger := Raw("[1-9][0-9]*")
sample := Concat(
LiteralUnion("apple", "orange").MustNamed("fruit"),
Concat(
LiteralUnion("cut", "diced").MustNamed("preparation"),
Raw("[a-z]{0,31}").MustNamed("description"),
).Optional(),
positiveInteger.MustNamed("sequence"),
Concat(Literal("-"), positiveInteger.MustNamed("count")).Optional(),
).Anchored()
sampleRE := sample.MustCompile()
fmt.Println(sampleRE)
for _, input := range []string{
"apple1",
"orange114",
"apple52-1",
"orangetb1",
"orangecutonerow1",
"orangedicedchunky10-4",
"orangecutthisdescriptionisnottoolong1",
"orangecutthisdescriptionisjustabittoolong1",
"somethingelse6",
"badapple1",
} {
match := sampleRE.FindStringSubmatch(input)
if match == nil {
fmt.Printf("%s: invalid\n", input)
continue
}
fruit, prep, descr, seq, count := match[1], match[2], match[3], match[4], match[5]
fmt.Printf("%s: fruit=%q prep=%q descr=%q seq=%q count=%q\n", input, fruit, prep, descr, seq, count)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment