Skip to content

Instantly share code, notes, and snippets.

@CAFxX
Last active October 13, 2025 08:25
Show Gist options
  • Save CAFxX/da4bba90a5ee2539ad354a4d138766eb to your computer and use it in GitHub Desktop.
Save CAFxX/da4bba90a5ee2539ad354a4d138766eb to your computer and use it in GitHub Desktop.
unicode.RangeTable code generator
package main
import (
"fmt"
"math/bits"
"slices"
"sort"
"strings"
"unicode"
)
type Range struct {
Lo, Hi, Stride uint32
Neq []uint32
M uint64
}
func GenerateRangetableCode1(t *unicode.RangeTable, name string) string {
r := make([]Range, 0, len(t.R16)+len(t.R32))
for _, v := range t.R16 {
r = append(r, Range{Lo: uint32(v.Lo), Hi: uint32(v.Hi), Stride: uint32(v.Stride)})
}
for _, v := range t.R32 {
r = append(r, Range{Lo: uint32(v.Lo), Hi: uint32(v.Hi), Stride: uint32(v.Stride)})
}
sort.Slice(r, func(i, j int) bool {
return r[i].Lo < r[j].Lo
})
if len(r) == 0 {
return fmt.Sprintf("func %s(_ rune) bool { return false }\n", name)
}
preRanges := len(r)
j := 0
for i := range r[:len(r)-1] {
if r[j].Hi+r[j].Stride == r[i+1].Lo && r[j].Stride == r[i+1].Stride {
r[j].Hi = r[i+1].Hi
} else if r[j].Hi+r[j].Stride*2 == r[i+1].Lo && r[j].Stride == r[i+1].Stride {
r[j].Neq = append(r[j].Neq, r[j].Hi+r[j].Stride)
r[j].Hi = r[i+1].Hi
} else {
j++
r[j] = r[i+1]
}
}
r = r[:j+1]
for i := 0; i < len(r); i++ {
k := 0
for j := i + 1; j < len(r); j++ {
if r[j].Hi-r[i].Lo > 64 {
break
}
k = j
}
if k == 0 {
continue
}
var mask uint64
for n := i; n <= k; n++ {
for m := r[n].Lo; m < r[n].Hi; m += r[n].Stride {
if slices.Contains(r[n].Neq, m) {
continue
}
mask |= 1 << (m - r[i].Lo)
}
}
if bits.OnesCount64(mask^(mask<<1)) < 2 || bits.OnesCount64(mask) < 3 {
continue
}
r[i] = Range{Lo: r[i].Lo, Hi: r[k].Hi, M: mask}
r = append(r[:i+1], r[k+1:]...)
_ = mask
}
runes := 0
for r := rune(0); r <= 0x10FFFF; r++ {
if unicode.Is(t, r) {
runes++
}
}
var sb strings.Builder
fmt.Fprintf(&sb, "func %s(r rune) bool {\n", name)
fmt.Fprintf(&sb, "\t// %d ranges (%d before opts), %d runes\n", len(r), preRanges, runes)
fmt.Fprintf(&sb, "\tswitch {\n")
for _, v := range r {
if v.M != 0 {
fmt.Fprintf(&sb, "\tcase 0x%X <= r && r <= 0x%X: if uint64(0x%X)&(uint64(1)<<(r-0x%X)) != 0 { goto rt } else { goto rf }\n", v.Lo, v.Hi, v.M, v.Lo)
} else if v.Lo == v.Hi {
fmt.Fprintf(&sb, "\tcase r == 0x%X:\n", v.Lo)
} else if v.Stride == 1 && v.Hi-v.Lo > 1 {
fmt.Fprintf(&sb, "\tcase 0x%X <= r && r <= 0x%X:", v.Lo, v.Hi)
if len(v.Neq) != 0 {
fmt.Fprintf(&sb, " if true")
for _, neq := range v.Neq {
fmt.Fprintf(&sb, " && r != 0x%X", neq)
}
fmt.Fprintf(&sb, " { goto rt } else { goto rf }")
}
fmt.Fprintf(&sb, "\n")
} else if v.Hi-v.Lo == v.Stride && bits.OnesCount32(v.Lo^v.Hi) == 1 {
fmt.Fprintf(&sb, "\tcase r | 0x%X == 0x%X:\n", v.Lo^v.Hi, v.Lo|v.Hi)
} else if v.Hi-v.Lo == v.Stride {
fmt.Fprintf(&sb, "\tcase r == 0x%X || r == 0x%X:\n", v.Lo, v.Hi)
} else if v.Hi-v.Lo == v.Stride*2 {
fmt.Fprintf(&sb, "\tcase r == 0x%X || r == 0x%X || r == 0x%X:\n", v.Lo, v.Lo+v.Stride, v.Hi)
} else if v.Hi-v.Lo == v.Stride*3 {
fmt.Fprintf(&sb, "\tcase r == 0x%X || r == 0x%X || r == 0x%X || r == 0x%X:\n", v.Lo, v.Lo+v.Stride, v.Lo+v.Stride*2, v.Hi)
} else {
fmt.Fprintf(&sb, "\tcase 0x%X <= r && r <= 0x%X: if (r-0x%X)%%%d == 0", v.Lo, v.Hi, v.Lo, v.Stride)
for _, neq := range v.Neq {
fmt.Fprintf(&sb, " && r != 0x%X", neq)
}
fmt.Fprintf(&sb, " { goto rt } else { goto rf }\n")
}
}
fmt.Fprintf(&sb, "\tdefault:\n\trf:\n\t\treturn false\n\t}\n\trt:\n\treturn true\n}\n")
return sb.String()
}
func main() {
fmt.Println(GenerateRangetableCode1(unicode.Letter, "IsLetter"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment