Skip to content

Instantly share code, notes, and snippets.

@DaseinPhaos
Created November 17, 2024 07:24
Show Gist options
  • Save DaseinPhaos/632066cc3a31dadfc521f2d4420379d4 to your computer and use it in GitHub Desktop.
Save DaseinPhaos/632066cc3a31dadfc521f2d4420379d4 to your computer and use it in GitHub Desktop.
Utility to fix garbled filenames in directories containing Japanese files from mishandled Shift-JIS encoded archives.
#import "Basic";
#import "Windows";
#import "Windows_Utf8";
#import "File";
#import "File_Utilities";
#import "String";
main :: () {
argc : s32;
argv := CommandLineToArgvW(GetCommandLineW(), *argc);
args := get_command_line_arguments();
if argc <= 0 {
log("Usage: jis_fix DIRECTORY_TO_FIX");
return;
}
fs: FixStatistics;
fs.acp = GetACP(); // TODO: or specify this from command line somehow.
fix_directory_recursive(wide_to_utf8_new(argv[1]), *fs); // let's be braindead and just do a recursive call...
log("done: total visited: %, total fixed: %", fs.visited, fs.fixed);
}
FixStatistics :: struct {
visited: s32;
fixed: s32;
acp: u32;
}
fix_directory_recursive :: (directoryToFix: string, statistics: *FixStatistics) {
//log("directoryToFix: [%]", directoryToFix);
visitor :: (info: *File_Visit_Info, statistics: *FixStatistics) {
statistics.visited += 1;
auto_release_temp();
//log("info.full_name: [%]", info.full_name);
dir, _, _, name := path_decomp(info.full_name);
//log("dir: [%], name: [%]", dir, name);
// `name` migh contain shift_jis encoded stuff that has been incorrectly interpreted
// as CP_65001. So we try to correct them here.
// A LOT OF unnecessary encoding overhead! sigh
CP_shift_jis :u32: 932;
CP_utf8 :u32: 65001;
// first recover the incorrectly encoded widechar sequence
wc, wcOk := mb2wc(xx name, CP_utf8,, temp); if !wcOk {
log("failed to recover the WideChar sequence...name: [%]", name);
return;
}
// then decode it into the a mbs using the encoding format
jisBytes, jisOk := wc2mb(wc, statistics.acp,, temp); if !jisOk {
log("failed to recover the ShiftJIS sequence...name: [%]", name);
return;
}
// then encode into a correct widechar sequence
wc, wcOk = mb2wc(jisBytes, CP_shift_jis,, temp); if !wcOk {
log("failed to rencode into a valid WideChar sequence...name: [%]", name);
return;
}
correctName, correctionOk := wide_to_utf8_new(wc.data, xx wc.count,, temp); if !correctionOk {
log("failed to encode the WideChar sequence into utf8...name: [%]", name);
return;
}
newPath := join(dir, correctName);
print("renaming [%] to [%]", info.full_name, newPath);
ok := file_move(info.full_name, newPath);
print(ifx ok then "ok!\n" else "oops.. failed!\n");
if ok {
statistics.fixed += 1;
if info.is_directory {
fix_directory_recursive(newPath, statistics);
}
}
}
visit_files(directoryToFix, false, statistics, visitor, visit_directories=true);
}
wc2mb :: (wc: []u16, cp: u32) -> []u8, bool{
omb: []u8;
sizeNeeded := WideCharToMultiByte(cp, 0, wc.data, xx wc.count, null, 0, null, null);
if sizeNeeded <= 0 {
log("failed to decode, error code: %", GetLastError());
return omb, false;
}
omb = NewArray(sizeNeeded, u8);
if omb.count != sizeNeeded {
log("failed to allocate multi-bytes buffer, size needed: %, size got: %", sizeNeeded, omb.count);
return omb, false;
}
sizeDecoded := WideCharToMultiByte(cp, 0, wc.data, xx wc.count, omb.data, xx omb.count, null, null);
assert(sizeDecoded == sizeNeeded);
return omb, true;
}
mb2wc :: (mb: []u8, cp: u32) -> []u16 ,bool {
owc: []u16;
sizeNeeded := MultiByteToWideChar(cp, 0, mb.data, xx mb.count, null, 0);
if sizeNeeded <= 0 {
log("failed to decode, error code: %", GetLastError());
return owc, false;
}
owc = NewArray(sizeNeeded, u16);
if owc.count != sizeNeeded {
log("failed to allocate wide char buffer, size needed: %, size got: %", sizeNeeded, owc.count);
return owc, false;
}
sizeDecoded := MultiByteToWideChar(cp, 0, mb.data, xx mb.count, owc.data, xx owc.count);
assert(sizeDecoded == sizeNeeded);
return owc, true;
}
GetACP :: () -> u32 #foreign Kernel32;
GetCommandLineW :: () -> *u16 #foreign Kernel32;
CommandLineToArgvW :: (lpCmdLine: *u16, pNumArgs: *s32) -> **u16 #foreign Shell32;
Kernel32 :: #system_library "Kernel32";
Shell32 :: #system_library "Shell32";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment