Last active
October 13, 2025 08:25
-
-
Save CAFxX/da4bba90a5ee2539ad354a4d138766eb to your computer and use it in GitHub Desktop.
unicode.RangeTable code generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"math/bits" | |
"slices" | |
"sort" | |
"strings" | |
"unicode" | |
) | |
type Range struct { | |
Lo, Hi, Stride uint32 | |
Neq []uint32 | |
M uint64 | |
} | |
func GenerateRangetableCode1(t *unicode.RangeTable, name string) string { | |
r := make([]Range, 0, len(t.R16)+len(t.R32)) | |
for _, v := range t.R16 { | |
r = append(r, Range{Lo: uint32(v.Lo), Hi: uint32(v.Hi), Stride: uint32(v.Stride)}) | |
} | |
for _, v := range t.R32 { | |
r = append(r, Range{Lo: uint32(v.Lo), Hi: uint32(v.Hi), Stride: uint32(v.Stride)}) | |
} | |
sort.Slice(r, func(i, j int) bool { | |
return r[i].Lo < r[j].Lo | |
}) | |
if len(r) == 0 { | |
return fmt.Sprintf("func %s(_ rune) bool { return false }\n", name) | |
} | |
preRanges := len(r) | |
j := 0 | |
for i := range r[:len(r)-1] { | |
if r[j].Hi+r[j].Stride == r[i+1].Lo && r[j].Stride == r[i+1].Stride { | |
r[j].Hi = r[i+1].Hi | |
} else if r[j].Hi+r[j].Stride*2 == r[i+1].Lo && r[j].Stride == r[i+1].Stride { | |
r[j].Neq = append(r[j].Neq, r[j].Hi+r[j].Stride) | |
r[j].Hi = r[i+1].Hi | |
} else { | |
j++ | |
r[j] = r[i+1] | |
} | |
} | |
r = r[:j+1] | |
for i := 0; i < len(r); i++ { | |
k := 0 | |
for j := i + 1; j < len(r); j++ { | |
if r[j].Hi-r[i].Lo > 64 { | |
break | |
} | |
k = j | |
} | |
if k == 0 { | |
continue | |
} | |
var mask uint64 | |
for n := i; n <= k; n++ { | |
for m := r[n].Lo; m < r[n].Hi; m += r[n].Stride { | |
if slices.Contains(r[n].Neq, m) { | |
continue | |
} | |
mask |= 1 << (m - r[i].Lo) | |
} | |
} | |
if bits.OnesCount64(mask^(mask<<1)) < 2 || bits.OnesCount64(mask) < 3 { | |
continue | |
} | |
r[i] = Range{Lo: r[i].Lo, Hi: r[k].Hi, M: mask} | |
r = append(r[:i+1], r[k+1:]...) | |
_ = mask | |
} | |
runes := 0 | |
for r := rune(0); r <= 0x10FFFF; r++ { | |
if unicode.Is(t, r) { | |
runes++ | |
} | |
} | |
var sb strings.Builder | |
fmt.Fprintf(&sb, "func %s(r rune) bool {\n", name) | |
fmt.Fprintf(&sb, "\t// %d ranges (%d before opts), %d runes\n", len(r), preRanges, runes) | |
fmt.Fprintf(&sb, "\tswitch {\n") | |
for _, v := range r { | |
if v.M != 0 { | |
fmt.Fprintf(&sb, "\tcase 0x%X <= r && r <= 0x%X: if uint64(0x%X)&(uint64(1)<<(r-0x%X)) != 0 { goto rt } else { goto rf }\n", v.Lo, v.Hi, v.M, v.Lo) | |
} else if v.Lo == v.Hi { | |
fmt.Fprintf(&sb, "\tcase r == 0x%X:\n", v.Lo) | |
} else if v.Stride == 1 && v.Hi-v.Lo > 1 { | |
fmt.Fprintf(&sb, "\tcase 0x%X <= r && r <= 0x%X:", v.Lo, v.Hi) | |
if len(v.Neq) != 0 { | |
fmt.Fprintf(&sb, " if true") | |
for _, neq := range v.Neq { | |
fmt.Fprintf(&sb, " && r != 0x%X", neq) | |
} | |
fmt.Fprintf(&sb, " { goto rt } else { goto rf }") | |
} | |
fmt.Fprintf(&sb, "\n") | |
} else if v.Hi-v.Lo == v.Stride && bits.OnesCount32(v.Lo^v.Hi) == 1 { | |
fmt.Fprintf(&sb, "\tcase r | 0x%X == 0x%X:\n", v.Lo^v.Hi, v.Lo|v.Hi) | |
} else if v.Hi-v.Lo == v.Stride { | |
fmt.Fprintf(&sb, "\tcase r == 0x%X || r == 0x%X:\n", v.Lo, v.Hi) | |
} else if v.Hi-v.Lo == v.Stride*2 { | |
fmt.Fprintf(&sb, "\tcase r == 0x%X || r == 0x%X || r == 0x%X:\n", v.Lo, v.Lo+v.Stride, v.Hi) | |
} else if v.Hi-v.Lo == v.Stride*3 { | |
fmt.Fprintf(&sb, "\tcase r == 0x%X || r == 0x%X || r == 0x%X || r == 0x%X:\n", v.Lo, v.Lo+v.Stride, v.Lo+v.Stride*2, v.Hi) | |
} else { | |
fmt.Fprintf(&sb, "\tcase 0x%X <= r && r <= 0x%X: if (r-0x%X)%%%d == 0", v.Lo, v.Hi, v.Lo, v.Stride) | |
for _, neq := range v.Neq { | |
fmt.Fprintf(&sb, " && r != 0x%X", neq) | |
} | |
fmt.Fprintf(&sb, " { goto rt } else { goto rf }\n") | |
} | |
} | |
fmt.Fprintf(&sb, "\tdefault:\n\trf:\n\t\treturn false\n\t}\n\trt:\n\treturn true\n}\n") | |
return sb.String() | |
} | |
func main() { | |
fmt.Println(GenerateRangetableCode1(unicode.Letter, "IsLetter")) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment