Skip to content

Instantly share code, notes, and snippets.

@VictoriqueMoe
Created January 29, 2026 19:28
Show Gist options
  • Select an option

  • Save VictoriqueMoe/b503c188ffea766af1b3cd7aba8fbc6f to your computer and use it in GitHub Desktop.

Select an option

Save VictoriqueMoe/b503c188ffea766af1b3cd7aba8fbc6f to your computer and use it in GitHub Desktop.
charaudit
package main
import (
"fmt"
"os"
"regexp"
"sort"
"strings"
)
var characters = map[string]string{
"00": "GroupVoices", "01": "Kinzo", "02": "Krauss", "03": "Natsuhi",
"04": "Jessica", "05": "Eva", "06": "Hideyoshi", "07": "George",
"08": "Rudolf", "09": "Kyrie", "10": "Battler", "11": "Ange",
"12": "Rosa", "13": "Maria", "14": "Genji", "15": "Shannon",
"16": "Kanon", "17": "Gohda", "18": "KumasawaChiyo", "19": "NanjoTerumasa",
"20": "Amakusa", "21": "Okonogi", "22": "Kasumi", "23": "ProfessorOotsuki",
"24": "CaptainKawabata", "25": "NanjoMasayuki", "26": "KumasawaSabakichi",
"27": "Beatrice", "28": "Bernkastel", "29": "Lambdadelta", "30": "Virgilia",
"31": "Ronove", "32": "Gaap", "33": "Sakutarou", "34": "Evatrice",
"35": "Chiester45", "36": "Chiester410", "37": "Chiester00", "38": "Lucifer",
"39": "Leviathan", "40": "Satan", "41": "Belphegor", "42": "Mammon",
"43": "Beelzebub", "44": "Asmodeus", "45": "Goat", "46": "Erika",
"47": "Dlanor", "48": "Gertrude", "49": "Cornelia", "50": "Featherine",
"51": "Zepar", "52": "Furfur", "53": "Lion", "54": "Willard",
"55": "Claire", "56": "Ikuko", "57": "Tohya", "58": "KinzoYoung",
"59": "BiceChickBeato", "60": "BeatoElder", "99": "MiscVoices",
}
var knownVariants = map[string]map[string]bool{
"kin": {"58": true},
"bea": {"32": true, "55": true},
"bu3": {"57": true},
}
var (
msgwndRegex = regexp.MustCompile(`^msgwnd_(\w+)\r?$`)
voiceRegex = regexp.MustCompile(`\[lv 0\*"(\d+)"\*"(\d+)"\]`)
)
func charName(id string) string {
if name, ok := characters[id]; ok {
return name
}
return "Unknown"
}
type mismatchGroup struct {
window string
expected string
got string
count int
audioMin string
audioMax string
}
func printTable(title string, groups []*mismatchGroup) {
if len(groups) == 0 {
fmt.Printf("%s: None\n\n", title)
return
}
total := 0
for _, g := range groups {
total += g.count
}
hdr := fmt.Sprintf("| %-12s | %-20s | %-20s | %5s | %-12s | %-12s |",
"Window", "Expected", "Got", "Lines", "Audio Min", "Audio Max")
sep := strings.Repeat("-", len(hdr))
fmt.Println(title)
fmt.Println(sep)
fmt.Println(hdr)
fmt.Println(sep)
for _, g := range groups {
expectedLabel := fmt.Sprintf("%s (%s)", charName(g.expected), g.expected)
gotLabel := fmt.Sprintf("%s (%s)", charName(g.got), g.got)
fmt.Printf("| %-12s | %-20s | %-20s | %5d | %-12s | %-12s |\n",
"msgwnd_"+g.window, expectedLabel, gotLabel, g.count, g.audioMin, g.audioMax)
}
fmt.Println(sep)
fmt.Printf("Subtotal: %d lines\n\n", total)
}
func main() {
data, err := os.ReadFile("internal/quote/data/english.txt")
if err != nil {
fmt.Fprintf(os.Stderr, "failed to read file: %v\n", err)
os.Exit(1)
}
lines := strings.Split(string(data), "\n")
suffixCounts := map[string]map[string]int{}
type entry struct {
suffix string
charID string
audioID string
}
var all []entry
currentSuffix := ""
for i := 0; i < len(lines); i++ {
line := strings.TrimRight(lines[i], "\r")
if m := msgwndRegex.FindStringSubmatch(line); m != nil {
currentSuffix = m[1]
continue
}
if currentSuffix == "" || currentSuffix == "non" {
continue
}
matches := voiceRegex.FindAllStringSubmatch(line, -1)
if len(matches) == 0 {
continue
}
charID := matches[0][1]
audioID := matches[0][2]
all = append(all, entry{suffix: currentSuffix, charID: charID, audioID: audioID})
if suffixCounts[currentSuffix] == nil {
suffixCounts[currentSuffix] = map[string]int{}
}
suffixCounts[currentSuffix][charID]++
currentSuffix = ""
}
expectedChar := map[string]string{}
for suffix, counts := range suffixCounts {
bestChar := ""
bestCount := 0
for charID, count := range counts {
if count > bestCount {
bestCount = count
bestChar = charID
}
}
expectedChar[suffix] = bestChar
}
groups := map[string]*mismatchGroup{}
for _, e := range all {
expected := expectedChar[e.suffix]
if e.charID == expected {
continue
}
key := e.suffix + "|" + expected + "|" + e.charID
g, ok := groups[key]
if !ok {
g = &mismatchGroup{
window: e.suffix,
expected: expected,
got: e.charID,
audioMin: e.audioID,
audioMax: e.audioID,
}
groups[key] = g
}
g.count++
if e.audioID < g.audioMin {
g.audioMin = e.audioID
}
if e.audioID > g.audioMax {
g.audioMax = e.audioID
}
}
var intentional []*mismatchGroup
var suspicious []*mismatchGroup
for _, g := range groups {
if variants, ok := knownVariants[g.window]; ok && variants[g.got] {
intentional = append(intentional, g)
} else {
suspicious = append(suspicious, g)
}
}
sortGroups := func(s []*mismatchGroup) {
sort.Slice(s, func(i, j int) bool {
if s[i].window != s[j].window {
return s[i].window < s[j].window
}
return s[i].got < s[j].got
})
}
sortGroups(intentional)
sortGroups(suspicious)
printTable("INTENTIONAL VARIANTS (known character forms sharing a window)", intentional)
printTable("SUSPICIOUS MISMATCHES (likely data entry bugs)", suspicious)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment