Created
November 1, 2021 04:55
-
-
Save bbbradsmith/5ba281debaf9001071b3cc0b636025a8 to your computer and use it in GitHub Desktop.
Iga Ninden Gaiou text extraction (unfinished)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Files: | |
# Iga Ninden Gaiou (Japan).chd CRC32 1243A2DE | |
# | |
# Use chdman to extractcd: | |
# ing.bin CRC32 07E9BA8D | |
romin = "ing.bin" | |
romout = "inge.bin" | |
rom = open(romin,"rb").read() | |
# check to try to sort out real text from garbage | |
def text_check(s): | |
return any(c in s for c in ["う","ん","い","し","。","?"]) # most common 4 hiragana + punctuation | |
# standard PCE text encoding seems to be a variant of shift_jisx0213 | |
# probably need to add a bunch of special cases as well | |
def decode_range(rom,i,span=64): | |
s = rom[i:i+span] | |
d = s.decode(encoding="shift_jisx0213",errors="backslashreplace") | |
return d | |
# decodes a block of the file and prints it | |
def view_range(rom,i,span=64,check=False): | |
d = decode_range(rom,i,span) | |
if not check or text_check(d): | |
print("%08X: %s" % (i,d)) | |
# views a range of blocks from the file, every block at 2 offsets | |
# (characters are 2-byte but can start/stop on 1-byte offsets) | |
def check_range(rom,start,end,span=64,check=True): | |
print("check_range(%08X,%08X)" % (start,end)) | |
for i in range(start,end,span): | |
view_range(rom,i+0,span,check) | |
view_range(rom,i+1,span,check) | |
print("done.") | |
# views a group of strings, terminated/separated by a 0 byte | |
def view_group(rom,i,end=-1): | |
print("view_group(%08X)" % i) | |
while (end<0) or (i<end): | |
e = rom.index(0,i) # search for next 0 to mark end of string | |
if (e-i) < 2: # all characters are 2-byte? | |
break | |
d = decode_range(rom,i,e-i) | |
print("%08X: [%s]" % (i,d)) | |
i = e+1 | |
print("end %08X" % i) | |
# replace bytes in ROM with an encoded string | |
# (probably need to deal with groups of 0 terminated strings?) | |
def inject_string(rom,i,s): | |
b = bytearray() | |
for c in s: | |
if c == "\n": # is this how the game does newlines? | |
b.append(0x02) | |
b.append(0x06) | |
b.append(0x90) | |
else: | |
b.extend(c.encode(encoding="shift_jisx0213")) | |
for j in range(len(b)): | |
rom[i+j] = b[j] | |
rom[i+len(b)] = 0 | |
# examples to explore ROM | |
##check_range(rom,0,len(rom)) # explore entire ROM, only recognized text | |
##check_range(rom,0x0080DA56,0x00810AB5) # explore range, only recognized text | |
##check_range(rom,0x0080DA56,0x00810AB5,64,False) # explore range, all as text | |
# suspicious areas that seem to contain text | |
# (use view_group to find the exact location and break these down) | |
##check_range(rom,0x0080D700,0x00820000) | |
##check_range(rom,0x008266C0,0x00826800) | |
##check_range(rom,0x0086EA40,0x00890400) | |
##check_range(rom,0x00FA0780,0x00FA5000) | |
##check_range(rom,0x240C0200,0x240EE400) | |
##check_range(rom,0x2487AD00,0x2487AE00) | |
# bits of text groups found via check_range | |
view_group(rom,0x0080D730) | |
view_group(rom,0x00810AB5) | |
# TODO group 0x008266C0,0x00826800 | |
view_group(rom,0x0086EA47,0x0086EBA0) | |
view_group(rom,0x0086EBA3) | |
view_group(rom,0x0086F181) | |
view_group(rom,0x008743CB,0x874424) | |
view_group(rom,0x00876001) | |
view_group(rom,0x00876464) | |
view_group(rom,0x008764C2) | |
view_group(rom,0x00876614) | |
view_group(rom,0x00876664) | |
view_group(rom,0x0087669F) | |
view_group(rom,0x00876711) | |
view_group(rom,0x00876886) | |
view_group(rom,0x00876909) | |
view_group(rom,0x00876A45) | |
# there's still a lot more in 0x0086EA40,0x00890400 | |
# there are duplicates of some things, e.g.: | |
view_group(rom,0x0086EBA3) | |
view_group(rom,0x00FA3B13) | |
view_group(rom,0x240C2C5D) | |
# now let's try to modify it | |
rom = bytearray(rom) # modifyable | |
#inject_string(rom,0x0086EBA3+0x1C,"Is this?") | |
inject_string(rom,0x00FA3B13,"How are you\ngentlemen?") | |
#inject_string(rom,0x240C2C5D+0x1C,"What the?") | |
view_group(rom,0x00FA3B13) | |
open(romout,"wb").write(rom) | |
print("%s written (%d bytes)" % (romout,len(rom))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment