Skip to content

Instantly share code, notes, and snippets.

@bbbradsmith
Last active July 23, 2024 05:14
Show Gist options
  • Save bbbradsmith/2fa6fcbdb91bfe85bc85f7f20779431a to your computer and use it in GitHub Desktop.
Save bbbradsmith/2fa6fcbdb91bfe85bc85f7f20779431a to your computer and use it in GitHub Desktop.
SimCity SNES decompressor
# This is a decompressor for data in the Sim City (Japan) ROM for Super Famicom
#
# This extracts the complete character set from the game, used for the popup dialogs,
# and also all of the text used in these dialogs.
# The five SNES versions in various languages can also be dumped by this script, see below.
#
# The decompressor is for a Nintendo compression format that was apparently used in several games,
# and has been known by some as LC_LZ5. It might be useful for other games besides SimCity.
import PIL.Image
ROM = "SimCity (Japan).sfc"
TILEDATA = 0x06E004
TEXTDATA = [0x6F4E8,0x7126B] # JP text comes in two separate compressed packets
TILESCEN = 0x047C9E
TEXTSCEN = [(0x05BB27,5),(0x05F620,12)] # scenario text comes in contiguous groups (5 from bank text, 12 from rio 2047)
PREFIX = "JP_"
JP = True
COLS = 24
SCENCOLS = 32
PAIRCOLS = 32
ROMVER = 0
# 0 - Japanese
# 1 - USA
# 2 - Europe
# 3 - France
# 4 - Germany
PALETTE = [
0xEE,0xE2,0xDE, # 0 background
0x00,0x00,0x00, # 1 text
0xFF,0x00,0x00, # 2 error
0xDE,0xC6,0xBD, # 3 grid
0xDE,0xFF,0xBD, # 4 reused tile background
0x00,0xA0,0x00, # 5 reused tile text
0xEE,0xE2,0xDE, # 6 2-bit background
0xFF,0x00,0x00, # 7 2-bit text 0
0x00,0x00,0xFF, # 8 2-bit text 1
0x00,0x00,0x00, # 9 2-bit text 2
]
DEBUG = False # princes decompression debug info
# other ROM versions, change ROMVER above
if ROMVER == 1:
ROM = "SimCity (USA).sfc"
TILEDATA = 0x04C0FB
TEXTDATA = [0x7A868,0x7DA83] # non-JP text comes in a single uncompressed ASCII range
TILESCEN = 0x04875C
TEXTSCEN = [(0x05BCAD,5),(0x05EE30,12)]
PREFIX = "US_"
JP = False
ASCII_OFFSET = 0
COLS = 24
PAIRCOLS = 16
elif ROMVER == 2:
ROM = "SimCity (Europe).sfc"
TILEDATA = 0x04C10F
TEXTDATA = [0x7A868,0x7DA83]
TEXTSCEN = [(0x05BD54,5),(0x05EED7,12)]
TILESCEN = 0x04875D
PREFIX = "EU_"
JP = False
ASCII_OFFSET = 0
COLS = 24
PAIRCOLS = 16
elif ROMVER == 3:
ROM = "SimCity (France).sfc"
TILEDATA= 0x04C601
TEXTDATA = [0x7B068,0x7E40E]
TILESCEN = 0x048AF2
TEXTSCEN = [(0x05C9FE,5),(0x05FC05,12)]
PREFIX = "FR_"
JP = False
ASCII_OFFSET = 0x20 # character table shifted to add more characters
COLS = 25
PAIRCOLS = 16
elif ROMVER == 4:
ROM = "SimCity (Germany).sfc"
TILEDATA= 0x04C223
TEXTDATA = [0x7B068,0x7E399]
TILESCEN = 0x048737
TEXTSCEN = [(0x05CBE6,5),(0x05FDCE,12)]
PREFIX = "DE_"
JP = False
ASCII_OFFSET = 0x20
COLS = 25
PAIRCOLS = 16
# Nintendo's compression format.
# This seems to have been used in several SNES games published by Nintendo.
# Some call this format: LC_LZ5
#
# It is a series of data packets, stored serially. Each packet starts with a control byte.
# If the control byte is $FF, the data is complete.
# Otherwise, the top 3 bits is the mode for the packet, and the bottom 5 bits are its length (-1).
# Packet types:
# $0x/000 = copy - copy the next (length) bytes to the output
# $2x/001 = byte repeat - repeat the next byte (length) times
# $4x/010 = word repeat - repeat the next 2 bytes for (length) bytes, length may be uneven
# $6x/011 = byte incrementing - take the next byte as a starting value, emit (length) bytes incrementing by 1 each time
# $8x/100 = abs reference - the next 2 bytes are relative to the start of the destination data, copy previously decoded data
# $Ax/101 = abs reference + invert - same as above but EOR $FF on all copied data
# $Cx/110 = relative reference - the next byte is how many bytes to look back from the end of the destination data, and copy
# $Ex/111 = extended length prefix (see below) - use a 10-bit length instead of 5-bit for this packet
# 111-111 = relative reference + invert - can only be used with an extended length prefix (see below)
# Extended length prefix:
# $Ex/111 = extended length prefix, which allows up to 10 bits of length, instead of only 5
# The middle 3 bits choose the packet mode as above, and the low 2 bits are now the top 2 bits of length.
# A second byte follows giving the bottom 8 bits of the length. Like with the 5-bit lengths, this value is 1 less than the actual length.
# 7..bit..0 7..bit..0
# --------- ---------
# 111m mmLL LLLL LLLL
# If the mode bits given by an extended length prefix are 111, it will be used as a relative reference + invert operation.
# However, for lengths that don't need the extension, $Ax/101 is equivalently efficient.
# I'm not sure if 111111xx exists in any real-world data. Note that its length can't have both high bits set either,
# because $FF will terminate the data.
#
# This format was derived from the decompression routine residing at $0090A6. It takes inputs:
# $0009 (word) - input address relative to $bb0000
# $000B (byte) - data bank (bb)
# $000C (word) - (temporaries)
# $000E (word) - output address relative to $7E8000
#
# I later found this reference that seems to confirm my assessment:
# https://github.com/bonimy/MushROMs/blob/master/doc/LC_LZ5%20Compression%20Format.md
def nintendo_decompress(rom,offset,print_error=True,print_debug=DEBUG):
# decodes compressed packet at offset in rom
# return: (decompressed data [bytearray], offset to end of compressed data [int], valid [bool])
class DecompException(Exception):
def __init__(self,message):
self.message = message
def next_byte():
nonlocal offset, rom
if offset >= len(rom):
raise DecompException("Out of data at [%06X]" % (offset))
r = rom[offset]
offset += 1
return r
d = bytearray()
result = True
try:
while True:
control = next_byte()
if control == 0xFF: # finish
break
mode = control & 0xE0
length = control & 0x1F
if mode == 0xE0: # E/111 extend length
mode = (control << 3) & 0xE0 # control is replaced by next three bits
l = next_byte()
if DEBUG:
print("ext: [%06X] %02X %02X" % (offset,control,l))
length = l | ((control & 0x03) << 8)
length += 1
# modes
if DEBUG:
MODENAME = [
"0/000 copy",
"2/001 byte repeat",
"4/010 word repeat",
"6/011 incrementing",
"8/100 abs reference",
"A/101 abs reference invert",
"C/110 relative reference",
"E/111 relative reference invert" ]
print("mode: %02X [%06X] (%4X) %4d %s" % (mode,offset,len(d),length,MODENAME[mode>>5]))
if mode == 0x00: # 0/000 copy
for i in range(length):
d.append(next_byte())
elif mode == 0x20: # 2/001 byte repeat
r = next_byte()
for i in range(length):
d.append(r)
elif mode == 0x40: # 4/010 word repeat
r0 = next_byte()
r1 = next_byte()
for i in range(length):
if i&1:
d.append(r1)
else:
d.append(r0)
elif mode == 0x60: # 3/011 incrementing
r = next_byte()
for i in range(length):
d.append(r)
r = (r+1) & 0xFF
else:
# 8/100 absolute back reference
# A/101 absolute back reference + invert
# C/110 relative back reference
# E/111 relative back reference + invert
ref = 0
if mode & 0x40: # relative
ref = len(d) - next_byte()
if ref < 0:
raise DecompException("Out of range relative reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d)))
else: # absolute
ref = next_byte()
ref |= next_byte() << 8
if DEBUG:
print("ref: (%02X) at [%06X] to [%04X] (%04X)" % (mode,offset,ref,len(d)))
for i in range(length):
if (ref >= len(d)):
raise DecompException("Out of range reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d)))
r = d[ref]
ref += 1
if mode & 0x20: # invert
r ^= 0xFF
d.append(r)
except DecompException as e:
if print_error: print(e)
result = False
return (d,offset,result)
# Renders a text dialog from given text data
def simcity_text_render(tiledata,textdata,offset,columns=24,rows=14,grid=False,reuse=False):
# tiledata = 1-bpp 8x8 tileset
# textdata = 16-bit words indexing tiledata tiles
# each row of text comes as a pair, first the top half then the bottom
# offset = offset into textdata
# grid = add a 1 pixel grid separating the characters, othersi
# return (rendered image [PIL.image], tile pair [set], invalid tiles present [bool])
iw = 8*columns
ih = 16*rows
clear_colour = 2
if grid:
iw += 1 + columns
ih += 1 + rows
clear_colour = 3
if iw < 1: iw = 1
if ih < 1: ih = 1
img = PIL.Image.new("P",(iw,ih),clear_colour)
img.putpalette(PALETTE)
pairs = set()
reused = set()
invalid = False
for rd in range(rows):
for rh in range(2):
r = (rd*2)+rh
for c in range(columns):
do = offset + ((r*(columns))+c)*2
if (do+2) > len(textdata): continue
ti = textdata[do+0] | (textdata[do+1] << 8)
if ((ti*8)+8) > len(tiledata):
invalid = True
continue
if rh == 0: # check pairing
do2 = do + (columns*2)
if (do2+2) <= len(textdata):
ti2 = textdata[do2+0] | (textdata[do2+1] << 8)
pairs.add((ti,ti2))
ox = c * 8
oy = r * 8
if grid:
ox += 1 + c
oy += 1 + (r//2)
for y in range(8):
bits = tiledata[(ti*8)+y]
for x in range(8):
p = (bits >> (7-x)) & 1
if ti in reused: p += 4 # recolor reused tiles
img.putpixel((ox+x,oy+y),p)
if reuse: reused.add(ti)
return (img,pairs,invalid)
# Renders an entire compressed data packet as text
def simcity_text_packet_render(tiledata,textdata,columns=24,grid=False,reuse=False):
stride = (columns*2*2)
return simcity_text_render(tiledata,textdata,0,columns,(len(textdata)+stride-1)//stride,grid,reuse)
# Renders a 1bpp 8x8 tileset
def tileset_render(tiledata,columns=16):
tiles = (len(tiledata)+7) // 8
if tiles < 1: tiles = 1
rows = (tiles + columns - 1) // columns
iw = 1 + 9 * columns
ih = 1 + 9 * rows
img = PIL.Image.new("P",(iw,ih),3)
img.putpalette(PALETTE)
for r in range(rows):
for c in range(columns):
ti = c + (r * columns)
ox = 1 + 9 * c
oy = 1 + 9 * r
for y in range(8):
do = (ti*8)+y
if do >= len(tiledata):
continue
bits = tiledata[do]
for x in range(8):
p = (bits >> (7-x)) & 1
img.putpixel((ox+x,oy+y),p)
return img
# Reduces 2bpp scenario tileset to 1bpp
def tilescen_reduce(tilescen):
d = bytearray()
for i in range(0,len(tilescen)-1,2):
d.append(tilescen[i+0])
return d
# Reduces non-JP 2bpp tilesets to 1bpp
def tileset_reduce(tiledata):
d = bytearray()
for i in range(0,len(tiledata)-1,2):
d.append((tiledata[i+0] | tiledata[i+1]) ^ 0xFF)
return d
# Converts scenario text to the dialog format (just needs to remove high bits)
def textscen_convert(textscen):
d = bytearray()
for i in range(0,len(textscen)-1,2):
ti = (textscen[i+0] | (textscen[i+1] << 8)) % 1024
d.append(ti & 0xFF)
d.append(ti >> 8)
return d
# Expands non-JP ASCII text to 16-bit
def textdata_expand(textdata,columns=24):
d = bytearray()
c = 0
for b in textdata:
if b != 0xFF:
b -= ASCII_OFFSET
if b >= 0:
d.append(b)
d.append(0)
else: # error
d.append(0xFF)
d.append(0xFF)
c = (c + 1) % columns
else: # FF is end of text
while c != 0:
d.append(ord(' '))
d.append(0)
c = (c + 1) % columns
for i in range(columns): # blank line to mark division
d.append(0xFF)
d.append(0xFF)
return d
#
#
# Main program
#
#
rom = open(ROM,"rb").read()
print("%s read..." % (ROM))
# fetch the tile data from the ROM
# before displaying text, 8x8 1-bpp TILEDATA is decompressed to 7E8800,
# then these are transformed into 2-bpp SNES tiles as needed by the text,
# using palettes to combine 2 1-bpp layers into a single tile for compact use of VRAM
(tiledata,tiledata_end,valid) = nintendo_decompress(rom,TILEDATA)
open(PREFIX+"tiledata.bin","wb").write(tiledata)
tiledata_size = len(tiledata)
if not JP: # collapse 2bpp to 1bpp
tiledata = tileset_reduce(tiledata)
tileset_render(tiledata).save(PREFIX+"tiledata.png")
print("Tile data decompressed: %d bytes, compressed to %d bytes (%stiledata.bin/png)" % (tiledata_size,tiledata_end-TILEDATA,PREFIX))
# dump text data, this seemed to be contained in exactly 2 compressed packets
pairs = set()
if JP:
for textdata in TEXTDATA:
fn = "%stext_%06X" % (PREFIX,textdata)
(d,textdata_end,valid) = nintendo_decompress(rom,textdata)
open(fn+".bin","wb").write(d)
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,d,COLS)
img.save(fn+".png")
print("Dumped text [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textdata,len(d),textdata_end-textdata,fn))
pairs.update(textpairs)
else: # non-Japanese versions just had a single uncompresesd ASCII block
d = rom[TEXTDATA[0]:TEXTDATA[1]]
de = textdata_expand(d,COLS)
fn = "%stext_%06X" % (PREFIX,TEXTDATA[0])
open(fn+".bin","wb").write(d)
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,de,COLS)
img.save(fn+".png")
print("Dumped text [%06X]: %d bytes (%s.bin/png)" % (TEXTDATA[0],len(d),fn))
# dump character pairs
if JP:
def pairkey(p): # maximum of pairs indicates the order of the first appearance of the pairing
return (max(p),p[0],p[1])
pairsort = sorted(pairs,key=pairkey)
pairdata = bytearray([0xFF]*((len(pairsort)+PAIRCOLS-1)//PAIRCOLS)*PAIRCOLS*2*2)
for i in range(len(pairsort)):
(t0,t1) = pairsort[i]
c = i % PAIRCOLS
r = i // PAIRCOLS
o = ((r * PAIRCOLS * 2) + c) * 2
pairdata[o+0] = t0 & 0xFF
pairdata[o+1] = t0 >> 8
pairdata[o+(PAIRCOLS*2)+0] = t1 & 0xFF
pairdata[o+(PAIRCOLS*2)+1] = t1 >> 8
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True)
img.save(PREFIX+"characters.png")
print("Dumped characters: %d total (%scharacters.png)" % (len(pairsort),PREFIX))
# check for unused tiles, make sure they're all accounted for
usedtiles = set()
for (t0,t1) in pairs:
usedtiles.add(t0)
usedtiles.add(t1)
unused = 0
usedmax = 0 if len(usedtiles)==0 else max(usedtiles)
for i in range(usedmax+1):
if i not in usedtiles:
print("Unused tile: %03X" % (i))
unused += 1
if not unused: print("No unused tiles found")
print("Used tile range: 000-%03X" % (usedmax))
(img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True,True)
img.save(PREFIX+"characters_reuse.png")
print("Characters used: %d (%scharacter_reuse.png)" % (len(usedtiles),PREFIX))
# separate tileset for scenarios
(tilescen,tilescen_end,valid) = nintendo_decompress(rom,TILESCEN)
open(PREFIX+"tilescen.bin","wb").write(tiledata)
tilescen_size = len(tilescen)
tilescen = tilescen_reduce(tilescen)
if JP:
tilescen_count = len(tilescen) // 16
scenpair = bytearray([0xFF]*((tilescen_count+PAIRCOLS-1)//PAIRCOLS)*PAIRCOLS*2*2)
for i in range(tilescen_count):
c = i % PAIRCOLS
r = i // PAIRCOLS
o = ((r * PAIRCOLS * 2) + c) * 2
ti = (i % 16) + 16 * (2 * (i // 16))
scenpair[o+0] = ti & 0xFF
scenpair[o+1] = ti >> 8
scenpair[o+(PAIRCOLS*2)+0] = (ti + 16) & 0xFF
scenpair[o+(PAIRCOLS*2)+1] = (ti + 16) >> 8
(img,scenpair,invalid) = simcity_text_packet_render(tilescen,scenpair,PAIRCOLS,True)
else:
img = tileset_render(tilescen)
img.save(PREFIX+"tilescen.png")
print("Tile scenario decompressed: %d bytes, compressed to %d bytes (%stilescen.bin/png)" % (tilescen_size,tilescen_end-TILESCEN,PREFIX))
# scenarios
for (textscen_off,count) in TEXTSCEN:
for i in range(count):
fn = "%sscen_%06X" % (PREFIX,textscen_off)
(d,textscen_end,result) = nintendo_decompress(rom,textscen_off)
open(fn+".bin","wb").write(d)
(img,scenpairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(d),SCENCOLS)
img.save(fn+".png")
print("Dumped scenario [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textscen_off,len(d),textscen_end-textscen_off,fn))
textscen_off = textscen_end
# search for all potential valid compressed text packets in ROM
# (this is how the TEXTDATA and TILEDATA packets were found)
if False:
o_progress = ~0
o = 0
SEARCH_MIN = COLS*2*2 # minimum packet size of one row
#SEARCH_MIN = 2048
while o < len(rom):
if (o & ~0x00000FFF) != (o_progress & ~0x00000FFF):
print("Searching [%06X]..." % (o & ~0x00000FFF))
o_progress = o
(sd,so,sresult) = nintendo_decompress(rom,o,False)
if (not sresult) or (len(sd) < SEARCH_MIN):
# no packet found, advance to next byte
o += 1
else:
print("Found: [%06X]-[%06X] %05X (%d) bytes" % (o,so,len(sd),len(sd)))
filename = "%06X" % (o)
if False: # looking for text
(img,pairs,invalid) = simcity_text_packet_render(tiledata,sd if JP else textdata_expand(sd,COLS),COLS)
img.save(PREFIX+"dumptext_"+filename+".png")
if False: # looking for scenario text
(img,pairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(sd),SCENCOLS)
img.save(PREFIX+"dumpscen_"+filename+".png")
if False: # looking for tileset
img = tileset_render(sd if JP else tileset_reduce(sd))
img.save(PREFIX+"dumptile_"+filename+".png")
if False: # dump data
open(PREFIX+filename+".bin","wb").write(sd)
# advance to end of packet
o = so
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment