Created
November 17, 2024 07:24
-
-
Save DaseinPhaos/632066cc3a31dadfc521f2d4420379d4 to your computer and use it in GitHub Desktop.
Utility to fix garbled filenames in directories containing Japanese files from mishandled Shift-JIS encoded archives.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import "Basic"; | |
#import "Windows"; | |
#import "Windows_Utf8"; | |
#import "File"; | |
#import "File_Utilities"; | |
#import "String"; | |
main :: () { | |
argc : s32; | |
argv := CommandLineToArgvW(GetCommandLineW(), *argc); | |
args := get_command_line_arguments(); | |
if argc <= 0 { | |
log("Usage: jis_fix DIRECTORY_TO_FIX"); | |
return; | |
} | |
fs: FixStatistics; | |
fs.acp = GetACP(); // TODO: or specify this from command line somehow. | |
fix_directory_recursive(wide_to_utf8_new(argv[1]), *fs); // let's be braindead and just do a recursive call... | |
log("done: total visited: %, total fixed: %", fs.visited, fs.fixed); | |
} | |
FixStatistics :: struct { | |
visited: s32; | |
fixed: s32; | |
acp: u32; | |
} | |
fix_directory_recursive :: (directoryToFix: string, statistics: *FixStatistics) { | |
//log("directoryToFix: [%]", directoryToFix); | |
visitor :: (info: *File_Visit_Info, statistics: *FixStatistics) { | |
statistics.visited += 1; | |
auto_release_temp(); | |
//log("info.full_name: [%]", info.full_name); | |
dir, _, _, name := path_decomp(info.full_name); | |
//log("dir: [%], name: [%]", dir, name); | |
// `name` migh contain shift_jis encoded stuff that has been incorrectly interpreted | |
// as CP_65001. So we try to correct them here. | |
// A LOT OF unnecessary encoding overhead! sigh | |
CP_shift_jis :u32: 932; | |
CP_utf8 :u32: 65001; | |
// first recover the incorrectly encoded widechar sequence | |
wc, wcOk := mb2wc(xx name, CP_utf8,, temp); if !wcOk { | |
log("failed to recover the WideChar sequence...name: [%]", name); | |
return; | |
} | |
// then decode it into the a mbs using the encoding format | |
jisBytes, jisOk := wc2mb(wc, statistics.acp,, temp); if !jisOk { | |
log("failed to recover the ShiftJIS sequence...name: [%]", name); | |
return; | |
} | |
// then encode into a correct widechar sequence | |
wc, wcOk = mb2wc(jisBytes, CP_shift_jis,, temp); if !wcOk { | |
log("failed to rencode into a valid WideChar sequence...name: [%]", name); | |
return; | |
} | |
correctName, correctionOk := wide_to_utf8_new(wc.data, xx wc.count,, temp); if !correctionOk { | |
log("failed to encode the WideChar sequence into utf8...name: [%]", name); | |
return; | |
} | |
newPath := join(dir, correctName); | |
print("renaming [%] to [%]", info.full_name, newPath); | |
ok := file_move(info.full_name, newPath); | |
print(ifx ok then "ok!\n" else "oops.. failed!\n"); | |
if ok { | |
statistics.fixed += 1; | |
if info.is_directory { | |
fix_directory_recursive(newPath, statistics); | |
} | |
} | |
} | |
visit_files(directoryToFix, false, statistics, visitor, visit_directories=true); | |
} | |
wc2mb :: (wc: []u16, cp: u32) -> []u8, bool{ | |
omb: []u8; | |
sizeNeeded := WideCharToMultiByte(cp, 0, wc.data, xx wc.count, null, 0, null, null); | |
if sizeNeeded <= 0 { | |
log("failed to decode, error code: %", GetLastError()); | |
return omb, false; | |
} | |
omb = NewArray(sizeNeeded, u8); | |
if omb.count != sizeNeeded { | |
log("failed to allocate multi-bytes buffer, size needed: %, size got: %", sizeNeeded, omb.count); | |
return omb, false; | |
} | |
sizeDecoded := WideCharToMultiByte(cp, 0, wc.data, xx wc.count, omb.data, xx omb.count, null, null); | |
assert(sizeDecoded == sizeNeeded); | |
return omb, true; | |
} | |
mb2wc :: (mb: []u8, cp: u32) -> []u16 ,bool { | |
owc: []u16; | |
sizeNeeded := MultiByteToWideChar(cp, 0, mb.data, xx mb.count, null, 0); | |
if sizeNeeded <= 0 { | |
log("failed to decode, error code: %", GetLastError()); | |
return owc, false; | |
} | |
owc = NewArray(sizeNeeded, u16); | |
if owc.count != sizeNeeded { | |
log("failed to allocate wide char buffer, size needed: %, size got: %", sizeNeeded, owc.count); | |
return owc, false; | |
} | |
sizeDecoded := MultiByteToWideChar(cp, 0, mb.data, xx mb.count, owc.data, xx owc.count); | |
assert(sizeDecoded == sizeNeeded); | |
return owc, true; | |
} | |
GetACP :: () -> u32 #foreign Kernel32; | |
GetCommandLineW :: () -> *u16 #foreign Kernel32; | |
CommandLineToArgvW :: (lpCmdLine: *u16, pNumArgs: *s32) -> **u16 #foreign Shell32; | |
Kernel32 :: #system_library "Kernel32"; | |
Shell32 :: #system_library "Shell32"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment