Last active
July 23, 2024 05:14
-
-
Save bbbradsmith/2fa6fcbdb91bfe85bc85f7f20779431a to your computer and use it in GitHub Desktop.
SimCity SNES decompressor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a decompressor for data in the Sim City (Japan) ROM for Super Famicom | |
# | |
# This extracts the complete character set from the game, used for the popup dialogs, | |
# and also all of the text used in these dialogs. | |
# The five SNES versions in various languages can also be dumped by this script, see below. | |
# | |
# The decompressor is for a Nintendo compression format that was apparently used in several games, | |
# and has been known by some as LC_LZ5. It might be useful for other games besides SimCity. | |
import PIL.Image | |
ROM = "SimCity (Japan).sfc" | |
TILEDATA = 0x06E004 | |
TEXTDATA = [0x6F4E8,0x7126B] # JP text comes in two separate compressed packets | |
TILESCEN = 0x047C9E | |
TEXTSCEN = [(0x05BB27,5),(0x05F620,12)] # scenario text comes in contiguous groups (5 from bank text, 12 from rio 2047) | |
PREFIX = "JP_" | |
JP = True | |
COLS = 24 | |
SCENCOLS = 32 | |
PAIRCOLS = 32 | |
ROMVER = 0 | |
# 0 - Japanese | |
# 1 - USA | |
# 2 - Europe | |
# 3 - France | |
# 4 - Germany | |
PALETTE = [ | |
0xEE,0xE2,0xDE, # 0 background | |
0x00,0x00,0x00, # 1 text | |
0xFF,0x00,0x00, # 2 error | |
0xDE,0xC6,0xBD, # 3 grid | |
0xDE,0xFF,0xBD, # 4 reused tile background | |
0x00,0xA0,0x00, # 5 reused tile text | |
0xEE,0xE2,0xDE, # 6 2-bit background | |
0xFF,0x00,0x00, # 7 2-bit text 0 | |
0x00,0x00,0xFF, # 8 2-bit text 1 | |
0x00,0x00,0x00, # 9 2-bit text 2 | |
] | |
DEBUG = False # princes decompression debug info | |
# other ROM versions, change ROMVER above | |
if ROMVER == 1: | |
ROM = "SimCity (USA).sfc" | |
TILEDATA = 0x04C0FB | |
TEXTDATA = [0x7A868,0x7DA83] # non-JP text comes in a single uncompressed ASCII range | |
TILESCEN = 0x04875C | |
TEXTSCEN = [(0x05BCAD,5),(0x05EE30,12)] | |
PREFIX = "US_" | |
JP = False | |
ASCII_OFFSET = 0 | |
COLS = 24 | |
PAIRCOLS = 16 | |
elif ROMVER == 2: | |
ROM = "SimCity (Europe).sfc" | |
TILEDATA = 0x04C10F | |
TEXTDATA = [0x7A868,0x7DA83] | |
TEXTSCEN = [(0x05BD54,5),(0x05EED7,12)] | |
TILESCEN = 0x04875D | |
PREFIX = "EU_" | |
JP = False | |
ASCII_OFFSET = 0 | |
COLS = 24 | |
PAIRCOLS = 16 | |
elif ROMVER == 3: | |
ROM = "SimCity (France).sfc" | |
TILEDATA= 0x04C601 | |
TEXTDATA = [0x7B068,0x7E40E] | |
TILESCEN = 0x048AF2 | |
TEXTSCEN = [(0x05C9FE,5),(0x05FC05,12)] | |
PREFIX = "FR_" | |
JP = False | |
ASCII_OFFSET = 0x20 # character table shifted to add more characters | |
COLS = 25 | |
PAIRCOLS = 16 | |
elif ROMVER == 4: | |
ROM = "SimCity (Germany).sfc" | |
TILEDATA= 0x04C223 | |
TEXTDATA = [0x7B068,0x7E399] | |
TILESCEN = 0x048737 | |
TEXTSCEN = [(0x05CBE6,5),(0x05FDCE,12)] | |
PREFIX = "DE_" | |
JP = False | |
ASCII_OFFSET = 0x20 | |
COLS = 25 | |
PAIRCOLS = 16 | |
# Nintendo's compression format. | |
# This seems to have been used in several SNES games published by Nintendo. | |
# Some call this format: LC_LZ5 | |
# | |
# It is a series of data packets, stored serially. Each packet starts with a control byte. | |
# If the control byte is $FF, the data is complete. | |
# Otherwise, the top 3 bits is the mode for the packet, and the bottom 5 bits are its length (-1). | |
# Packet types: | |
# $0x/000 = copy - copy the next (length) bytes to the output | |
# $2x/001 = byte repeat - repeat the next byte (length) times | |
# $4x/010 = word repeat - repeat the next 2 bytes for (length) bytes, length may be uneven | |
# $6x/011 = byte incrementing - take the next byte as a starting value, emit (length) bytes incrementing by 1 each time | |
# $8x/100 = abs reference - the next 2 bytes are relative to the start of the destination data, copy previously decoded data | |
# $Ax/101 = abs reference + invert - same as above but EOR $FF on all copied data | |
# $Cx/110 = relative reference - the next byte is how many bytes to look back from the end of the destination data, and copy | |
# $Ex/111 = extended length prefix (see below) - use a 10-bit length instead of 5-bit for this packet | |
# 111-111 = relative reference + invert - can only be used with an extended length prefix (see below) | |
# Extended length prefix: | |
# $Ex/111 = extended length prefix, which allows up to 10 bits of length, instead of only 5 | |
# The middle 3 bits choose the packet mode as above, and the low 2 bits are now the top 2 bits of length. | |
# A second byte follows giving the bottom 8 bits of the length. Like with the 5-bit lengths, this value is 1 less than the actual length. | |
# 7..bit..0 7..bit..0 | |
# --------- --------- | |
# 111m mmLL LLLL LLLL | |
# If the mode bits given by an extended length prefix are 111, it will be used as a relative reference + invert operation. | |
# However, for lengths that don't need the extension, $Ax/101 is equivalently efficient. | |
# I'm not sure if 111111xx exists in any real-world data. Note that its length can't have both high bits set either, | |
# because $FF will terminate the data. | |
# | |
# This format was derived from the decompression routine residing at $0090A6. It takes inputs: | |
# $0009 (word) - input address relative to $bb0000 | |
# $000B (byte) - data bank (bb) | |
# $000C (word) - (temporaries) | |
# $000E (word) - output address relative to $7E8000 | |
# | |
# I later found this reference that seems to confirm my assessment: | |
# https://github.com/bonimy/MushROMs/blob/master/doc/LC_LZ5%20Compression%20Format.md | |
def nintendo_decompress(rom,offset,print_error=True,print_debug=DEBUG): | |
# decodes compressed packet at offset in rom | |
# return: (decompressed data [bytearray], offset to end of compressed data [int], valid [bool]) | |
class DecompException(Exception): | |
def __init__(self,message): | |
self.message = message | |
def next_byte(): | |
nonlocal offset, rom | |
if offset >= len(rom): | |
raise DecompException("Out of data at [%06X]" % (offset)) | |
r = rom[offset] | |
offset += 1 | |
return r | |
d = bytearray() | |
result = True | |
try: | |
while True: | |
control = next_byte() | |
if control == 0xFF: # finish | |
break | |
mode = control & 0xE0 | |
length = control & 0x1F | |
if mode == 0xE0: # E/111 extend length | |
mode = (control << 3) & 0xE0 # control is replaced by next three bits | |
l = next_byte() | |
if DEBUG: | |
print("ext: [%06X] %02X %02X" % (offset,control,l)) | |
length = l | ((control & 0x03) << 8) | |
length += 1 | |
# modes | |
if DEBUG: | |
MODENAME = [ | |
"0/000 copy", | |
"2/001 byte repeat", | |
"4/010 word repeat", | |
"6/011 incrementing", | |
"8/100 abs reference", | |
"A/101 abs reference invert", | |
"C/110 relative reference", | |
"E/111 relative reference invert" ] | |
print("mode: %02X [%06X] (%4X) %4d %s" % (mode,offset,len(d),length,MODENAME[mode>>5])) | |
if mode == 0x00: # 0/000 copy | |
for i in range(length): | |
d.append(next_byte()) | |
elif mode == 0x20: # 2/001 byte repeat | |
r = next_byte() | |
for i in range(length): | |
d.append(r) | |
elif mode == 0x40: # 4/010 word repeat | |
r0 = next_byte() | |
r1 = next_byte() | |
for i in range(length): | |
if i&1: | |
d.append(r1) | |
else: | |
d.append(r0) | |
elif mode == 0x60: # 3/011 incrementing | |
r = next_byte() | |
for i in range(length): | |
d.append(r) | |
r = (r+1) & 0xFF | |
else: | |
# 8/100 absolute back reference | |
# A/101 absolute back reference + invert | |
# C/110 relative back reference | |
# E/111 relative back reference + invert | |
ref = 0 | |
if mode & 0x40: # relative | |
ref = len(d) - next_byte() | |
if ref < 0: | |
raise DecompException("Out of range relative reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d))) | |
else: # absolute | |
ref = next_byte() | |
ref |= next_byte() << 8 | |
if DEBUG: | |
print("ref: (%02X) at [%06X] to [%04X] (%04X)" % (mode,offset,ref,len(d))) | |
for i in range(length): | |
if (ref >= len(d)): | |
raise DecompException("Out of range reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d))) | |
r = d[ref] | |
ref += 1 | |
if mode & 0x20: # invert | |
r ^= 0xFF | |
d.append(r) | |
except DecompException as e: | |
if print_error: print(e) | |
result = False | |
return (d,offset,result) | |
# Renders a text dialog from given text data | |
def simcity_text_render(tiledata,textdata,offset,columns=24,rows=14,grid=False,reuse=False): | |
# tiledata = 1-bpp 8x8 tileset | |
# textdata = 16-bit words indexing tiledata tiles | |
# each row of text comes as a pair, first the top half then the bottom | |
# offset = offset into textdata | |
# grid = add a 1 pixel grid separating the characters, othersi | |
# return (rendered image [PIL.image], tile pair [set], invalid tiles present [bool]) | |
iw = 8*columns | |
ih = 16*rows | |
clear_colour = 2 | |
if grid: | |
iw += 1 + columns | |
ih += 1 + rows | |
clear_colour = 3 | |
if iw < 1: iw = 1 | |
if ih < 1: ih = 1 | |
img = PIL.Image.new("P",(iw,ih),clear_colour) | |
img.putpalette(PALETTE) | |
pairs = set() | |
reused = set() | |
invalid = False | |
for rd in range(rows): | |
for rh in range(2): | |
r = (rd*2)+rh | |
for c in range(columns): | |
do = offset + ((r*(columns))+c)*2 | |
if (do+2) > len(textdata): continue | |
ti = textdata[do+0] | (textdata[do+1] << 8) | |
if ((ti*8)+8) > len(tiledata): | |
invalid = True | |
continue | |
if rh == 0: # check pairing | |
do2 = do + (columns*2) | |
if (do2+2) <= len(textdata): | |
ti2 = textdata[do2+0] | (textdata[do2+1] << 8) | |
pairs.add((ti,ti2)) | |
ox = c * 8 | |
oy = r * 8 | |
if grid: | |
ox += 1 + c | |
oy += 1 + (r//2) | |
for y in range(8): | |
bits = tiledata[(ti*8)+y] | |
for x in range(8): | |
p = (bits >> (7-x)) & 1 | |
if ti in reused: p += 4 # recolor reused tiles | |
img.putpixel((ox+x,oy+y),p) | |
if reuse: reused.add(ti) | |
return (img,pairs,invalid) | |
# Renders an entire compressed data packet as text | |
def simcity_text_packet_render(tiledata,textdata,columns=24,grid=False,reuse=False): | |
stride = (columns*2*2) | |
return simcity_text_render(tiledata,textdata,0,columns,(len(textdata)+stride-1)//stride,grid,reuse) | |
# Renders a 1bpp 8x8 tileset | |
def tileset_render(tiledata,columns=16): | |
tiles = (len(tiledata)+7) // 8 | |
if tiles < 1: tiles = 1 | |
rows = (tiles + columns - 1) // columns | |
iw = 1 + 9 * columns | |
ih = 1 + 9 * rows | |
img = PIL.Image.new("P",(iw,ih),3) | |
img.putpalette(PALETTE) | |
for r in range(rows): | |
for c in range(columns): | |
ti = c + (r * columns) | |
ox = 1 + 9 * c | |
oy = 1 + 9 * r | |
for y in range(8): | |
do = (ti*8)+y | |
if do >= len(tiledata): | |
continue | |
bits = tiledata[do] | |
for x in range(8): | |
p = (bits >> (7-x)) & 1 | |
img.putpixel((ox+x,oy+y),p) | |
return img | |
# Reduces 2bpp scenario tileset to 1bpp | |
def tilescen_reduce(tilescen): | |
d = bytearray() | |
for i in range(0,len(tilescen)-1,2): | |
d.append(tilescen[i+0]) | |
return d | |
# Reduces non-JP 2bpp tilesets to 1bpp | |
def tileset_reduce(tiledata): | |
d = bytearray() | |
for i in range(0,len(tiledata)-1,2): | |
d.append((tiledata[i+0] | tiledata[i+1]) ^ 0xFF) | |
return d | |
# Converts scenario text to the dialog format (just needs to remove high bits) | |
def textscen_convert(textscen): | |
d = bytearray() | |
for i in range(0,len(textscen)-1,2): | |
ti = (textscen[i+0] | (textscen[i+1] << 8)) % 1024 | |
d.append(ti & 0xFF) | |
d.append(ti >> 8) | |
return d | |
# Expands non-JP ASCII text to 16-bit | |
def textdata_expand(textdata,columns=24): | |
d = bytearray() | |
c = 0 | |
for b in textdata: | |
if b != 0xFF: | |
b -= ASCII_OFFSET | |
if b >= 0: | |
d.append(b) | |
d.append(0) | |
else: # error | |
d.append(0xFF) | |
d.append(0xFF) | |
c = (c + 1) % columns | |
else: # FF is end of text | |
while c != 0: | |
d.append(ord(' ')) | |
d.append(0) | |
c = (c + 1) % columns | |
for i in range(columns): # blank line to mark division | |
d.append(0xFF) | |
d.append(0xFF) | |
return d | |
# | |
# | |
# Main program | |
# | |
# | |
rom = open(ROM,"rb").read() | |
print("%s read..." % (ROM)) | |
# fetch the tile data from the ROM | |
# before displaying text, 8x8 1-bpp TILEDATA is decompressed to 7E8800, | |
# then these are transformed into 2-bpp SNES tiles as needed by the text, | |
# using palettes to combine 2 1-bpp layers into a single tile for compact use of VRAM | |
(tiledata,tiledata_end,valid) = nintendo_decompress(rom,TILEDATA) | |
open(PREFIX+"tiledata.bin","wb").write(tiledata) | |
tiledata_size = len(tiledata) | |
if not JP: # collapse 2bpp to 1bpp | |
tiledata = tileset_reduce(tiledata) | |
tileset_render(tiledata).save(PREFIX+"tiledata.png") | |
print("Tile data decompressed: %d bytes, compressed to %d bytes (%stiledata.bin/png)" % (tiledata_size,tiledata_end-TILEDATA,PREFIX)) | |
# dump text data, this seemed to be contained in exactly 2 compressed packets | |
pairs = set() | |
if JP: | |
for textdata in TEXTDATA: | |
fn = "%stext_%06X" % (PREFIX,textdata) | |
(d,textdata_end,valid) = nintendo_decompress(rom,textdata) | |
open(fn+".bin","wb").write(d) | |
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,d,COLS) | |
img.save(fn+".png") | |
print("Dumped text [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textdata,len(d),textdata_end-textdata,fn)) | |
pairs.update(textpairs) | |
else: # non-Japanese versions just had a single uncompresesd ASCII block | |
d = rom[TEXTDATA[0]:TEXTDATA[1]] | |
de = textdata_expand(d,COLS) | |
fn = "%stext_%06X" % (PREFIX,TEXTDATA[0]) | |
open(fn+".bin","wb").write(d) | |
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,de,COLS) | |
img.save(fn+".png") | |
print("Dumped text [%06X]: %d bytes (%s.bin/png)" % (TEXTDATA[0],len(d),fn)) | |
# dump character pairs | |
if JP: | |
def pairkey(p): # maximum of pairs indicates the order of the first appearance of the pairing | |
return (max(p),p[0],p[1]) | |
pairsort = sorted(pairs,key=pairkey) | |
pairdata = bytearray([0xFF]*((len(pairsort)+PAIRCOLS-1)//PAIRCOLS)*PAIRCOLS*2*2) | |
for i in range(len(pairsort)): | |
(t0,t1) = pairsort[i] | |
c = i % PAIRCOLS | |
r = i // PAIRCOLS | |
o = ((r * PAIRCOLS * 2) + c) * 2 | |
pairdata[o+0] = t0 & 0xFF | |
pairdata[o+1] = t0 >> 8 | |
pairdata[o+(PAIRCOLS*2)+0] = t1 & 0xFF | |
pairdata[o+(PAIRCOLS*2)+1] = t1 >> 8 | |
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True) | |
img.save(PREFIX+"characters.png") | |
print("Dumped characters: %d total (%scharacters.png)" % (len(pairsort),PREFIX)) | |
# check for unused tiles, make sure they're all accounted for | |
usedtiles = set() | |
for (t0,t1) in pairs: | |
usedtiles.add(t0) | |
usedtiles.add(t1) | |
unused = 0 | |
usedmax = 0 if len(usedtiles)==0 else max(usedtiles) | |
for i in range(usedmax+1): | |
if i not in usedtiles: | |
print("Unused tile: %03X" % (i)) | |
unused += 1 | |
if not unused: print("No unused tiles found") | |
print("Used tile range: 000-%03X" % (usedmax)) | |
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True,True) | |
img.save(PREFIX+"characters_reuse.png") | |
print("Characters used: %d (%scharacter_reuse.png)" % (len(usedtiles),PREFIX)) | |
# separate tileset for scenarios | |
(tilescen,tilescen_end,valid) = nintendo_decompress(rom,TILESCEN) | |
open(PREFIX+"tilescen.bin","wb").write(tiledata) | |
tilescen_size = len(tilescen) | |
tilescen = tilescen_reduce(tilescen) | |
if JP: | |
tilescen_count = len(tilescen) // 16 | |
scenpair = bytearray([0xFF]*((tilescen_count+PAIRCOLS-1)//PAIRCOLS)*PAIRCOLS*2*2) | |
for i in range(tilescen_count): | |
c = i % PAIRCOLS | |
r = i // PAIRCOLS | |
o = ((r * PAIRCOLS * 2) + c) * 2 | |
ti = (i % 16) + 16 * (2 * (i // 16)) | |
scenpair[o+0] = ti & 0xFF | |
scenpair[o+1] = ti >> 8 | |
scenpair[o+(PAIRCOLS*2)+0] = (ti + 16) & 0xFF | |
scenpair[o+(PAIRCOLS*2)+1] = (ti + 16) >> 8 | |
(img,scenpair,invalid) = simcity_text_packet_render(tilescen,scenpair,PAIRCOLS,True) | |
else: | |
img = tileset_render(tilescen) | |
img.save(PREFIX+"tilescen.png") | |
print("Tile scenario decompressed: %d bytes, compressed to %d bytes (%stilescen.bin/png)" % (tilescen_size,tilescen_end-TILESCEN,PREFIX)) | |
# scenarios | |
for (textscen_off,count) in TEXTSCEN: | |
for i in range(count): | |
fn = "%sscen_%06X" % (PREFIX,textscen_off) | |
(d,textscen_end,result) = nintendo_decompress(rom,textscen_off) | |
open(fn+".bin","wb").write(d) | |
(img,scenpairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(d),SCENCOLS) | |
img.save(fn+".png") | |
print("Dumped scenario [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textscen_off,len(d),textscen_end-textscen_off,fn)) | |
textscen_off = textscen_end | |
# search for all potential valid compressed text packets in ROM | |
# (this is how the TEXTDATA and TILEDATA packets were found) | |
if False: | |
o_progress = ~0 | |
o = 0 | |
SEARCH_MIN = COLS*2*2 # minimum packet size of one row | |
#SEARCH_MIN = 2048 | |
while o < len(rom): | |
if (o & ~0x00000FFF) != (o_progress & ~0x00000FFF): | |
print("Searching [%06X]..." % (o & ~0x00000FFF)) | |
o_progress = o | |
(sd,so,sresult) = nintendo_decompress(rom,o,False) | |
if (not sresult) or (len(sd) < SEARCH_MIN): | |
# no packet found, advance to next byte | |
o += 1 | |
else: | |
print("Found: [%06X]-[%06X] %05X (%d) bytes" % (o,so,len(sd),len(sd))) | |
filename = "%06X" % (o) | |
if False: # looking for text | |
(img,pairs,invalid) = simcity_text_packet_render(tiledata,sd if JP else textdata_expand(sd,COLS),COLS) | |
img.save(PREFIX+"dumptext_"+filename+".png") | |
if False: # looking for scenario text | |
(img,pairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(sd),SCENCOLS) | |
img.save(PREFIX+"dumpscen_"+filename+".png") | |
if False: # looking for tileset | |
img = tileset_render(sd if JP else tileset_reduce(sd)) | |
img.save(PREFIX+"dumptile_"+filename+".png") | |
if False: # dump data | |
open(PREFIX+filename+".bin","wb").write(sd) | |
# advance to end of packet | |
o = so |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment