Created
January 29, 2026 19:28
-
-
Save VictoriqueMoe/b503c188ffea766af1b3cd7aba8fbc6f to your computer and use it in GitHub Desktop.
charaudit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "fmt" | |
| "os" | |
| "regexp" | |
| "sort" | |
| "strings" | |
| ) | |
| var characters = map[string]string{ | |
| "00": "GroupVoices", "01": "Kinzo", "02": "Krauss", "03": "Natsuhi", | |
| "04": "Jessica", "05": "Eva", "06": "Hideyoshi", "07": "George", | |
| "08": "Rudolf", "09": "Kyrie", "10": "Battler", "11": "Ange", | |
| "12": "Rosa", "13": "Maria", "14": "Genji", "15": "Shannon", | |
| "16": "Kanon", "17": "Gohda", "18": "KumasawaChiyo", "19": "NanjoTerumasa", | |
| "20": "Amakusa", "21": "Okonogi", "22": "Kasumi", "23": "ProfessorOotsuki", | |
| "24": "CaptainKawabata", "25": "NanjoMasayuki", "26": "KumasawaSabakichi", | |
| "27": "Beatrice", "28": "Bernkastel", "29": "Lambdadelta", "30": "Virgilia", | |
| "31": "Ronove", "32": "Gaap", "33": "Sakutarou", "34": "Evatrice", | |
| "35": "Chiester45", "36": "Chiester410", "37": "Chiester00", "38": "Lucifer", | |
| "39": "Leviathan", "40": "Satan", "41": "Belphegor", "42": "Mammon", | |
| "43": "Beelzebub", "44": "Asmodeus", "45": "Goat", "46": "Erika", | |
| "47": "Dlanor", "48": "Gertrude", "49": "Cornelia", "50": "Featherine", | |
| "51": "Zepar", "52": "Furfur", "53": "Lion", "54": "Willard", | |
| "55": "Claire", "56": "Ikuko", "57": "Tohya", "58": "KinzoYoung", | |
| "59": "BiceChickBeato", "60": "BeatoElder", "99": "MiscVoices", | |
| } | |
| var knownVariants = map[string]map[string]bool{ | |
| "kin": {"58": true}, | |
| "bea": {"32": true, "55": true}, | |
| "bu3": {"57": true}, | |
| } | |
| var ( | |
| msgwndRegex = regexp.MustCompile(`^msgwnd_(\w+)\r?$`) | |
| voiceRegex = regexp.MustCompile(`\[lv 0\*"(\d+)"\*"(\d+)"\]`) | |
| ) | |
| func charName(id string) string { | |
| if name, ok := characters[id]; ok { | |
| return name | |
| } | |
| return "Unknown" | |
| } | |
| type mismatchGroup struct { | |
| window string | |
| expected string | |
| got string | |
| count int | |
| audioMin string | |
| audioMax string | |
| } | |
| func printTable(title string, groups []*mismatchGroup) { | |
| if len(groups) == 0 { | |
| fmt.Printf("%s: None\n\n", title) | |
| return | |
| } | |
| total := 0 | |
| for _, g := range groups { | |
| total += g.count | |
| } | |
| hdr := fmt.Sprintf("| %-12s | %-20s | %-20s | %5s | %-12s | %-12s |", | |
| "Window", "Expected", "Got", "Lines", "Audio Min", "Audio Max") | |
| sep := strings.Repeat("-", len(hdr)) | |
| fmt.Println(title) | |
| fmt.Println(sep) | |
| fmt.Println(hdr) | |
| fmt.Println(sep) | |
| for _, g := range groups { | |
| expectedLabel := fmt.Sprintf("%s (%s)", charName(g.expected), g.expected) | |
| gotLabel := fmt.Sprintf("%s (%s)", charName(g.got), g.got) | |
| fmt.Printf("| %-12s | %-20s | %-20s | %5d | %-12s | %-12s |\n", | |
| "msgwnd_"+g.window, expectedLabel, gotLabel, g.count, g.audioMin, g.audioMax) | |
| } | |
| fmt.Println(sep) | |
| fmt.Printf("Subtotal: %d lines\n\n", total) | |
| } | |
| func main() { | |
| data, err := os.ReadFile("internal/quote/data/english.txt") | |
| if err != nil { | |
| fmt.Fprintf(os.Stderr, "failed to read file: %v\n", err) | |
| os.Exit(1) | |
| } | |
| lines := strings.Split(string(data), "\n") | |
| suffixCounts := map[string]map[string]int{} | |
| type entry struct { | |
| suffix string | |
| charID string | |
| audioID string | |
| } | |
| var all []entry | |
| currentSuffix := "" | |
| for i := 0; i < len(lines); i++ { | |
| line := strings.TrimRight(lines[i], "\r") | |
| if m := msgwndRegex.FindStringSubmatch(line); m != nil { | |
| currentSuffix = m[1] | |
| continue | |
| } | |
| if currentSuffix == "" || currentSuffix == "non" { | |
| continue | |
| } | |
| matches := voiceRegex.FindAllStringSubmatch(line, -1) | |
| if len(matches) == 0 { | |
| continue | |
| } | |
| charID := matches[0][1] | |
| audioID := matches[0][2] | |
| all = append(all, entry{suffix: currentSuffix, charID: charID, audioID: audioID}) | |
| if suffixCounts[currentSuffix] == nil { | |
| suffixCounts[currentSuffix] = map[string]int{} | |
| } | |
| suffixCounts[currentSuffix][charID]++ | |
| currentSuffix = "" | |
| } | |
| expectedChar := map[string]string{} | |
| for suffix, counts := range suffixCounts { | |
| bestChar := "" | |
| bestCount := 0 | |
| for charID, count := range counts { | |
| if count > bestCount { | |
| bestCount = count | |
| bestChar = charID | |
| } | |
| } | |
| expectedChar[suffix] = bestChar | |
| } | |
| groups := map[string]*mismatchGroup{} | |
| for _, e := range all { | |
| expected := expectedChar[e.suffix] | |
| if e.charID == expected { | |
| continue | |
| } | |
| key := e.suffix + "|" + expected + "|" + e.charID | |
| g, ok := groups[key] | |
| if !ok { | |
| g = &mismatchGroup{ | |
| window: e.suffix, | |
| expected: expected, | |
| got: e.charID, | |
| audioMin: e.audioID, | |
| audioMax: e.audioID, | |
| } | |
| groups[key] = g | |
| } | |
| g.count++ | |
| if e.audioID < g.audioMin { | |
| g.audioMin = e.audioID | |
| } | |
| if e.audioID > g.audioMax { | |
| g.audioMax = e.audioID | |
| } | |
| } | |
| var intentional []*mismatchGroup | |
| var suspicious []*mismatchGroup | |
| for _, g := range groups { | |
| if variants, ok := knownVariants[g.window]; ok && variants[g.got] { | |
| intentional = append(intentional, g) | |
| } else { | |
| suspicious = append(suspicious, g) | |
| } | |
| } | |
| sortGroups := func(s []*mismatchGroup) { | |
| sort.Slice(s, func(i, j int) bool { | |
| if s[i].window != s[j].window { | |
| return s[i].window < s[j].window | |
| } | |
| return s[i].got < s[j].got | |
| }) | |
| } | |
| sortGroups(intentional) | |
| sortGroups(suspicious) | |
| printTable("INTENTIONAL VARIANTS (known character forms sharing a window)", intentional) | |
| printTable("SUSPICIOUS MISMATCHES (likely data entry bugs)", suspicious) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment