bbbradsmith · July 23, 2024 05:14
diff --git a/simcity_decompress.py b/simcity_decompress.py
 # This is a decompressor for data in the Sim City (Japan) ROM for Super Famicom
 #
 # This extracts the complete character set from the game, used for the popup dialogs,
 # and also all of the text used in these dialogs.
 # The five SNES versions in various languages can also be dumped by this script, see below.
 #
 # The decompressor is for a Nintendo compression format that was apparently used in several games,
 # and has been known by some as LC_LZ5. It might be useful for other games besides SimCity.

 import PIL.Image

 ROM = "SimCity (Japan).sfc"
 TILEDATA = 0x06E004
 TEXTDATA = [0x6F4E8,0x7126B] # JP text comes in two separate compressed packets
 TILESCEN = 0x047C9E
 TEXTSCEN = [(0x05BB27,5),(0x05F620,12)] # scenario text comes in contiguous groups (5 from bank text, 12 from rio 2047)
 PREFIX = "JP_"
 JP = True
 COLS = 24
 SCENCOLS = 32
 PAIRCOLS = 32

 ROMVER = 0
 # 0 - Japanese
 # 1 - USA
 # 2 - Europe
 # 3 - France
 # 4 - Germany

 PALETTE = [
    0xEE,0xE2,0xDE, # 0 background
    0x00,0x00,0x00, # 1 text
    0xFF,0x00,0x00, # 2 error
    0xDE,0xC6,0xBD, # 3 grid
    0xDE,0xFF,0xBD, # 4 reused tile background
    0x00,0xA0,0x00, # 5 reused tile text
    0xEE,0xE2,0xDE, # 6 2-bit background
    0xFF,0x00,0x00, # 7 2-bit text 0
    0x00,0x00,0xFF, # 8 2-bit text 1
    0x00,0x00,0x00, # 9 2-bit text 2   
    ]

 DEBUG = False # princes decompression debug info

 # other ROM versions, change ROMVER above
 if ROMVER == 1:
    ROM = "SimCity (USA).sfc"
    TILEDATA = 0x04C0FB
    TEXTDATA = [0x7A868,0x7DA83] # non-JP text comes in a single uncompressed ASCII range
    TILESCEN = 0x04875C
    TEXTSCEN = [(0x05BCAD,5),(0x05EE30,12)]
    PREFIX = "US_"
    JP = False
    ASCII_OFFSET = 0
    COLS = 24
    PAIRCOLS = 16
 elif ROMVER == 2:
    ROM = "SimCity (Europe).sfc"
    TILEDATA = 0x04C10F
    TEXTDATA = [0x7A868,0x7DA83]
    TEXTSCEN = [(0x05BD54,5),(0x05EED7,12)]
    TILESCEN = 0x04875D
    PREFIX = "EU_"
    JP = False
    ASCII_OFFSET = 0
    COLS = 24
    PAIRCOLS = 16
 elif ROMVER == 3:
    ROM = "SimCity (France).sfc"
    TILEDATA= 0x04C601
    TEXTDATA = [0x7B068,0x7E40E]
    TILESCEN = 0x048AF2
    TEXTSCEN = [(0x05C9FE,5),(0x05FC05,12)]
    PREFIX = "FR_"
    JP = False
    ASCII_OFFSET = 0x20 # character table shifted to add more characters
    COLS = 25
    PAIRCOLS = 16
 elif ROMVER == 4:
    ROM = "SimCity (Germany).sfc"
    TILEDATA= 0x04C223
    TEXTDATA = [0x7B068,0x7E399]
    TILESCEN = 0x048737
    TEXTSCEN = [(0x05CBE6,5),(0x05FDCE,12)]
    PREFIX = "DE_"
    JP = False
    ASCII_OFFSET = 0x20
    COLS = 25
    PAIRCOLS = 16



 # Nintendo's compression format.
 # This seems to have been used in several SNES games published by Nintendo.
 # Some call this format: LC_LZ5
 #
 # It is a series of data packets, stored serially. Each packet starts with a control byte.
 # If the control byte is $FF, the data is complete.
 # Otherwise, the top 3 bits is the mode for the packet, and the bottom 5 bits are its length (-1).
 # Packet types:
 #   $0x/000 = copy - copy the next (length) bytes to the output
 #   $2x/001 = byte repeat - repeat the next byte (length) times
 #   $4x/010 = word repeat - repeat the next 2 bytes for (length) bytes, length may be uneven
 #   $6x/011 = byte incrementing - take the next byte as a starting value, emit (length) bytes incrementing by 1 each time
 #   $8x/100 = abs reference - the next 2 bytes are relative to the start of the destination data, copy previously decoded data
 #   $Ax/101 = abs reference + invert - same as above but EOR $FF on all copied data
 #   $Cx/110 = relative reference - the next byte is how many bytes to look back from the end of the destination data, and copy
 #   $Ex/111 = extended length prefix (see below) - use a 10-bit length instead of 5-bit for this packet
 #   111-111 = relative reference + invert - can only be used with an extended length prefix (see below)
 # Extended length prefix:
 #   $Ex/111 = extended length prefix, which allows up to 10 bits of length, instead of only 5
 #       The middle 3 bits choose the packet mode as above, and the low 2 bits are now the top 2 bits of length.
 #       A second byte follows giving the bottom 8 bits of the length. Like with the 5-bit lengths, this value is 1 less than the actual length.
 #       7..bit..0 7..bit..0
 #       --------- ---------
 #       111m mmLL LLLL LLLL
 #   If the mode bits given by an extended length prefix are 111, it will be used as a relative reference + invert operation.
 #   However, for lengths that don't need the extension, $Ax/101 is equivalently efficient.
 #   I'm not sure if 111111xx exists in any real-world data. Note that its length can't have both high bits set either,
 #   because $FF will terminate the data.
 #
 # This format was derived from the decompression routine residing at $0090A6. It takes inputs:
 #   $0009 (word) - input address relative to $bb0000
 #   $000B (byte) - data bank (bb)
 #   $000C (word) - (temporaries)
 #   $000E (word) - output address relative to $7E8000
 #
 # I later found this reference that seems to confirm my assessment:
 #   https://github.com/bonimy/MushROMs/blob/master/doc/LC_LZ5%20Compression%20Format.md

 def nintendo_decompress(rom,offset,print_error=True,print_debug=DEBUG):
    # decodes compressed packet at offset in rom
    # return: (decompressed data [bytearray], offset to end of compressed data [int], valid [bool])
    class DecompException(Exception):
        def __init__(self,message):
            self.message = message
    def next_byte():
        nonlocal offset, rom
        if offset >= len(rom):
            raise DecompException("Out of data at [%06X]" % (offset))
        r = rom[offset]
        offset += 1
        return r   
    d = bytearray()
    result = True
    try:
        while True:
            control = next_byte()
            if control == 0xFF: # finish
                break
            mode   = control & 0xE0
            length = control & 0x1F
            if mode == 0xE0: # E/111 extend length
                mode = (control << 3) & 0xE0 # control is replaced by next three bits
                l = next_byte()
                if DEBUG:
                    print("ext: [%06X] %02X %02X" % (offset,control,l))
                length = l | ((control & 0x03) << 8)
            length += 1
            # modes
            if DEBUG:
                MODENAME = [
                    "0/000 copy",
                    "2/001 byte repeat",
                    "4/010 word repeat",
                    "6/011 incrementing",
                    "8/100 abs reference",
                    "A/101 abs reference invert",
                    "C/110 relative reference",
                    "E/111 relative reference invert" ]
                print("mode: %02X [%06X] (%4X) %4d %s" % (mode,offset,len(d),length,MODENAME[mode>>5]))
            if mode == 0x00: # 0/000 copy
                for i in range(length):
                    d.append(next_byte())
            elif mode == 0x20: # 2/001 byte repeat
                r = next_byte()
                for i in range(length):
                    d.append(r)
            elif mode == 0x40: # 4/010 word repeat
                r0 = next_byte()
                r1 = next_byte()
                for i in range(length):
                    if i&1:
                        d.append(r1)
                    else:
                        d.append(r0)
            elif mode == 0x60: # 3/011 incrementing
                r = next_byte()
                for i in range(length):
                    d.append(r)
                    r = (r+1) & 0xFF
            else:
                # 8/100 absolute back reference
                # A/101 absolute back reference + invert
                # C/110 relative back reference
                # E/111 relative back reference + invert
                ref = 0
                if mode & 0x40: # relative
                    ref = len(d) - next_byte()
                    if ref < 0:
                        raise DecompException("Out of range relative reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d)))
                else: # absolute
                    ref = next_byte()
                    ref |= next_byte() << 8
                if DEBUG:
                    print("ref: (%02X) at [%06X] to [%04X] (%04X)" % (mode,offset,ref,len(d)))
                for i in range(length):
                    if (ref >= len(d)):
                        raise DecompException("Out of range reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d)))
                    r = d[ref]
                    ref += 1
                    if mode & 0x20: # invert
                        r ^= 0xFF
                    d.append(r)
    except DecompException as e:
        if print_error: print(e)
        result = False
    return (d,offset,result)



 # Renders a text dialog from given text data
 def simcity_text_render(tiledata,textdata,offset,columns=24,rows=14,grid=False,reuse=False):
    # tiledata = 1-bpp 8x8 tileset
    # textdata = 16-bit words indexing tiledata tiles
    #            each row of text comes as a pair, first the top half then the bottom
    # offset = offset into textdata
    # grid = add a 1 pixel grid separating the characters, othersi
    # return (rendered image [PIL.image], tile pair [set], invalid tiles present [bool])
    iw = 8*columns
    ih = 16*rows
    clear_colour = 2
    if grid:
        iw += 1 + columns
        ih += 1 + rows
        clear_colour = 3
    if iw < 1: iw = 1
    if ih < 1: ih = 1
    img = PIL.Image.new("P",(iw,ih),clear_colour)
    img.putpalette(PALETTE)
    pairs = set()
    reused = set()
    invalid = False
    for rd in range(rows):
        for rh in range(2):
            r = (rd*2)+rh    
            for c in range(columns):
                do = offset + ((r*(columns))+c)*2
                if (do+2) > len(textdata): continue
                ti = textdata[do+0] | (textdata[do+1] << 8)
                if ((ti*8)+8) > len(tiledata):
                    invalid = True
                    continue
                if rh == 0: # check pairing
                    do2 = do + (columns*2)
                    if (do2+2) <= len(textdata):
                        ti2 = textdata[do2+0] | (textdata[do2+1] << 8)
                        pairs.add((ti,ti2))
                ox = c * 8
                oy = r * 8
                if grid:
                    ox += 1 + c
                    oy += 1 + (r//2)
                for y in range(8):
                    bits = tiledata[(ti*8)+y]
                    for x in range(8):
                        p = (bits >> (7-x)) & 1
                        if ti in reused: p += 4 # recolor reused tiles
                        img.putpixel((ox+x,oy+y),p)
                if reuse: reused.add(ti)
    return (img,pairs,invalid)


 # Renders an entire compressed data packet as text
 def simcity_text_packet_render(tiledata,textdata,columns=24,grid=False,reuse=False):
    stride = (columns*2*2)
    return simcity_text_render(tiledata,textdata,0,columns,(len(textdata)+stride-1)//stride,grid,reuse)


 # Renders a 1bpp 8x8 tileset
 def tileset_render(tiledata,columns=16):
    tiles = (len(tiledata)+7) // 8
    if tiles < 1: tiles = 1
    rows = (tiles + columns - 1) // columns
    iw = 1 + 9 * columns
    ih = 1 + 9 * rows
    img = PIL.Image.new("P",(iw,ih),3)
    img.putpalette(PALETTE)
    for r in range(rows):
        for c in range(columns):
            ti = c + (r * columns)
            ox = 1 + 9 * c
            oy = 1 + 9 * r
            for y in range(8):
                do = (ti*8)+y
                if do >= len(tiledata):
                    continue
                bits = tiledata[do]
                for x in range(8):
                    p = (bits >> (7-x)) & 1
                    img.putpixel((ox+x,oy+y),p)
    return img


 # Reduces 2bpp scenario tileset to 1bpp
 def tilescen_reduce(tilescen):
    d = bytearray()
    for i in range(0,len(tilescen)-1,2):
        d.append(tilescen[i+0])
    return d    

 # Reduces non-JP 2bpp tilesets to 1bpp
 def tileset_reduce(tiledata):
    d = bytearray()
    for i in range(0,len(tiledata)-1,2):
        d.append((tiledata[i+0] | tiledata[i+1]) ^ 0xFF)
    return d

 # Converts scenario text to the dialog format (just needs to remove high bits)
 def textscen_convert(textscen):
    d = bytearray()
    for i in range(0,len(textscen)-1,2):
        ti = (textscen[i+0] | (textscen[i+1] << 8)) % 1024
        d.append(ti & 0xFF)
        d.append(ti >> 8)
    return d

 # Expands non-JP ASCII text to 16-bit
 def textdata_expand(textdata,columns=24):
    d = bytearray()
    c = 0
    for b in textdata:
        if b != 0xFF:
            b -= ASCII_OFFSET
            if b >= 0:
                d.append(b)
                d.append(0)
            else: # error
                d.append(0xFF)
                d.append(0xFF)
            c = (c + 1) % columns
        else: # FF is end of text
            while c != 0:
                d.append(ord(' '))
                d.append(0)
                c = (c + 1) % columns
            for i in range(columns): # blank line to mark division
                d.append(0xFF)
                d.append(0xFF)            
    return d



 #
 #
 # Main program
 #
 #

 rom = open(ROM,"rb").read()
 print("%s read..." % (ROM))

 # fetch the tile data from the ROM
 #   before displaying text, 8x8 1-bpp TILEDATA is decompressed to 7E8800,
 #   then these are transformed into 2-bpp SNES tiles as needed by the text,
 #   using palettes to combine 2 1-bpp layers into a single tile for compact use of VRAM
 (tiledata,tiledata_end,valid) = nintendo_decompress(rom,TILEDATA)
 open(PREFIX+"tiledata.bin","wb").write(tiledata)
 tiledata_size = len(tiledata)
 if not JP: # collapse 2bpp to 1bpp
    tiledata = tileset_reduce(tiledata)
 tileset_render(tiledata).save(PREFIX+"tiledata.png")
 print("Tile data decompressed: %d bytes, compressed to %d bytes (%stiledata.bin/png)" % (tiledata_size,tiledata_end-TILEDATA,PREFIX))


 # dump text data, this seemed to be contained in exactly 2 compressed packets
 pairs = set()
 if JP:
    for textdata in TEXTDATA:
        fn = "%stext_%06X" % (PREFIX,textdata)
        (d,textdata_end,valid) = nintendo_decompress(rom,textdata)
        open(fn+".bin","wb").write(d)
        (img,textpairs,invalid) = simcity_text_packet_render(tiledata,d,COLS)
        img.save(fn+".png")
        print("Dumped text [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textdata,len(d),textdata_end-textdata,fn))
        pairs.update(textpairs)
 else: # non-Japanese versions just had a single uncompresesd ASCII block
    d = rom[TEXTDATA[0]:TEXTDATA[1]]
    de = textdata_expand(d,COLS)
    fn = "%stext_%06X" % (PREFIX,TEXTDATA[0])
    open(fn+".bin","wb").write(d)
    (img,textpairs,invalid) = simcity_text_packet_render(tiledata,de,COLS)
    img.save(fn+".png")
    print("Dumped text [%06X]: %d bytes (%s.bin/png)" % (TEXTDATA[0],len(d),fn))


 # dump character pairs
 if JP:
    def pairkey(p): # maximum of pairs indicates the order of the first appearance of the pairing
        return (max(p),p[0],p[1])
    pairsort = sorted(pairs,key=pairkey)
    pairdata = bytearray([0xFF]*((len(pairsort)+PAIRCOLS-1)//PAIRCOLS)*PAIRCOLS*2*2)
    for i in range(len(pairsort)):
        (t0,t1) = pairsort[i]
        c = i % PAIRCOLS
        r = i // PAIRCOLS
        o = ((r * PAIRCOLS * 2) + c) * 2
        pairdata[o+0] = t0 & 0xFF
        pairdata[o+1] = t0 >> 8
        pairdata[o+(PAIRCOLS*2)+0] = t1 & 0xFF
        pairdata[o+(PAIRCOLS*2)+1] = t1 >> 8
    (img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True)
    img.save(PREFIX+"characters.png")
    print("Dumped characters: %d total (%scharacters.png)" % (len(pairsort),PREFIX))
    # check for unused tiles, make sure they're all accounted for
    usedtiles = set()
    for (t0,t1) in pairs:
        usedtiles.add(t0)
        usedtiles.add(t1)
    unused = 0
    usedmax = 0 if len(usedtiles)==0 else max(usedtiles)
    for i in range(usedmax+1):
        if i not in usedtiles:
            print("Unused tile: %03X" % (i))
            unused += 1
    if not unused: print("No unused tiles found")
    print("Used tile range: 000-%03X" % (usedmax))
    (img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True,True)
    img.save(PREFIX+"characters_reuse.png")
    print("Characters used: %d (%scharacter_reuse.png)" % (len(usedtiles),PREFIX))  


 # separate tileset for scenarios
 (tilescen,tilescen_end,valid) = nintendo_decompress(rom,TILESCEN)
 open(PREFIX+"tilescen.bin","wb").write(tiledata)
 tilescen_size = len(tilescen)
 tilescen = tilescen_reduce(tilescen)
 if JP:
    tilescen_count = len(tilescen) // 16
    scenpair = bytearray([0xFF]*((tilescen_count+PAIRCOLS-1)//PAIRCOLS)*PAIRCOLS*2*2)
    for i in range(tilescen_count):
        c = i % PAIRCOLS
        r = i // PAIRCOLS
        o = ((r * PAIRCOLS * 2) + c) * 2
        ti = (i % 16) + 16 * (2 * (i // 16))
        scenpair[o+0] = ti & 0xFF
        scenpair[o+1] = ti >> 8
        scenpair[o+(PAIRCOLS*2)+0] = (ti + 16) & 0xFF
        scenpair[o+(PAIRCOLS*2)+1] = (ti + 16) >> 8        
    (img,scenpair,invalid) = simcity_text_packet_render(tilescen,scenpair,PAIRCOLS,True)
 else:
    img = tileset_render(tilescen)
 img.save(PREFIX+"tilescen.png")
 print("Tile scenario decompressed: %d bytes, compressed to %d bytes (%stilescen.bin/png)" % (tilescen_size,tilescen_end-TILESCEN,PREFIX))


 # scenarios
 for (textscen_off,count) in TEXTSCEN:
    for i in range(count):
        fn = "%sscen_%06X" % (PREFIX,textscen_off)
        (d,textscen_end,result) = nintendo_decompress(rom,textscen_off)
        open(fn+".bin","wb").write(d)
        (img,scenpairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(d),SCENCOLS)
        img.save(fn+".png")
        print("Dumped scenario [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textscen_off,len(d),textscen_end-textscen_off,fn))
        textscen_off = textscen_end


 # search for all potential valid compressed text packets in ROM
 # (this is how the TEXTDATA and TILEDATA packets were found)
 if False:
    o_progress = ~0
    o = 0
    SEARCH_MIN = COLS*2*2 # minimum packet size of one row
    #SEARCH_MIN = 2048
    while o < len(rom):
        if (o & ~0x00000FFF) != (o_progress & ~0x00000FFF):
            print("Searching [%06X]..." % (o & ~0x00000FFF))
            o_progress = o
        (sd,so,sresult) = nintendo_decompress(rom,o,False)
        if (not sresult) or (len(sd) < SEARCH_MIN):
            # no packet found, advance to next byte
            o += 1
        else:
            print("Found: [%06X]-[%06X] %05X (%d) bytes" % (o,so,len(sd),len(sd)))
            filename = "%06X" % (o)
            if False: # looking for text
                (img,pairs,invalid) = simcity_text_packet_render(tiledata,sd if JP else textdata_expand(sd,COLS),COLS)
                img.save(PREFIX+"dumptext_"+filename+".png")
            if False: # looking for scenario text
                (img,pairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(sd),SCENCOLS)
                img.save(PREFIX+"dumpscen_"+filename+".png")
            if False: # looking for tileset
                img = tileset_render(sd if JP else tileset_reduce(sd))
                img.save(PREFIX+"dumptile_"+filename+".png")
            if False: # dump data
                open(PREFIX+filename+".bin","wb").write(sd)
            # advance to end of packet
            o = so
	# This is a decompressor for data in the Sim City (Japan) ROM for Super Famicom
	#
	# This extracts the complete character set from the game, used for the popup dialogs,
	# and also all of the text used in these dialogs.
	# The five SNES versions in various languages can also be dumped by this script, see below.
	#
	# The decompressor is for a Nintendo compression format that was apparently used in several games,
	# and has been known by some as LC_LZ5. It might be useful for other games besides SimCity.

	import PIL.Image

	ROM = "SimCity (Japan).sfc"
	TILEDATA = 0x06E004
	TEXTDATA = [0x6F4E8,0x7126B] # JP text comes in two separate compressed packets
	TILESCEN = 0x047C9E
	TEXTSCEN = [(0x05BB27,5),(0x05F620,12)] # scenario text comes in contiguous groups (5 from bank text, 12 from rio 2047)
	PREFIX = "JP_"
	JP = True
	COLS = 24
	SCENCOLS = 32
	PAIRCOLS = 32

	ROMVER = 0
	# 0 - Japanese
	# 1 - USA
	# 2 - Europe
	# 3 - France
	# 4 - Germany

	PALETTE = [
	0xEE,0xE2,0xDE, # 0 background
	0x00,0x00,0x00, # 1 text
	0xFF,0x00,0x00, # 2 error
	0xDE,0xC6,0xBD, # 3 grid
	0xDE,0xFF,0xBD, # 4 reused tile background
	0x00,0xA0,0x00, # 5 reused tile text
	0xEE,0xE2,0xDE, # 6 2-bit background
	0xFF,0x00,0x00, # 7 2-bit text 0
	0x00,0x00,0xFF, # 8 2-bit text 1
	0x00,0x00,0x00, # 9 2-bit text 2
	]

	DEBUG = False # princes decompression debug info

	# other ROM versions, change ROMVER above
	if ROMVER == 1:
	ROM = "SimCity (USA).sfc"
	TILEDATA = 0x04C0FB
	TEXTDATA = [0x7A868,0x7DA83] # non-JP text comes in a single uncompressed ASCII range
	TILESCEN = 0x04875C
	TEXTSCEN = [(0x05BCAD,5),(0x05EE30,12)]
	PREFIX = "US_"
	JP = False
	ASCII_OFFSET = 0
	COLS = 24
	PAIRCOLS = 16
	elif ROMVER == 2:
	ROM = "SimCity (Europe).sfc"
	TILEDATA = 0x04C10F
	TEXTDATA = [0x7A868,0x7DA83]
	TEXTSCEN = [(0x05BD54,5),(0x05EED7,12)]
	TILESCEN = 0x04875D
	PREFIX = "EU_"
	JP = False
	ASCII_OFFSET = 0
	COLS = 24
	PAIRCOLS = 16
	elif ROMVER == 3:
	ROM = "SimCity (France).sfc"
	TILEDATA= 0x04C601
	TEXTDATA = [0x7B068,0x7E40E]
	TILESCEN = 0x048AF2
	TEXTSCEN = [(0x05C9FE,5),(0x05FC05,12)]
	PREFIX = "FR_"
	JP = False
	ASCII_OFFSET = 0x20 # character table shifted to add more characters
	COLS = 25
	PAIRCOLS = 16
	elif ROMVER == 4:
	ROM = "SimCity (Germany).sfc"
	TILEDATA= 0x04C223
	TEXTDATA = [0x7B068,0x7E399]
	TILESCEN = 0x048737
	TEXTSCEN = [(0x05CBE6,5),(0x05FDCE,12)]
	PREFIX = "DE_"
	JP = False
	ASCII_OFFSET = 0x20
	COLS = 25
	PAIRCOLS = 16



	# Nintendo's compression format.
	# This seems to have been used in several SNES games published by Nintendo.
	# Some call this format: LC_LZ5
	#
	# It is a series of data packets, stored serially. Each packet starts with a control byte.
	# If the control byte is $FF, the data is complete.
	# Otherwise, the top 3 bits is the mode for the packet, and the bottom 5 bits are its length (-1).
	# Packet types:
	# $0x/000 = copy - copy the next (length) bytes to the output
	# $2x/001 = byte repeat - repeat the next byte (length) times
	# $4x/010 = word repeat - repeat the next 2 bytes for (length) bytes, length may be uneven
	# $6x/011 = byte incrementing - take the next byte as a starting value, emit (length) bytes incrementing by 1 each time
	# $8x/100 = abs reference - the next 2 bytes are relative to the start of the destination data, copy previously decoded data
	# $Ax/101 = abs reference + invert - same as above but EOR $FF on all copied data
	# $Cx/110 = relative reference - the next byte is how many bytes to look back from the end of the destination data, and copy
	# $Ex/111 = extended length prefix (see below) - use a 10-bit length instead of 5-bit for this packet
	# 111-111 = relative reference + invert - can only be used with an extended length prefix (see below)
	# Extended length prefix:
	# $Ex/111 = extended length prefix, which allows up to 10 bits of length, instead of only 5
	# The middle 3 bits choose the packet mode as above, and the low 2 bits are now the top 2 bits of length.
	# A second byte follows giving the bottom 8 bits of the length. Like with the 5-bit lengths, this value is 1 less than the actual length.
	# 7..bit..0 7..bit..0
	# --------- ---------
	# 111m mmLL LLLL LLLL
	# If the mode bits given by an extended length prefix are 111, it will be used as a relative reference + invert operation.
	# However, for lengths that don't need the extension, $Ax/101 is equivalently efficient.
	# I'm not sure if 111111xx exists in any real-world data. Note that its length can't have both high bits set either,
	# because $FF will terminate the data.
	#
	# This format was derived from the decompression routine residing at $0090A6. It takes inputs:
	# $0009 (word) - input address relative to $bb0000
	# $000B (byte) - data bank (bb)
	# $000C (word) - (temporaries)
	# $000E (word) - output address relative to $7E8000
	#
	# I later found this reference that seems to confirm my assessment:
	# https://github.com/bonimy/MushROMs/blob/master/doc/LC_LZ5%20Compression%20Format.md

	def nintendo_decompress(rom,offset,print_error=True,print_debug=DEBUG):
	# decodes compressed packet at offset in rom
	# return: (decompressed data [bytearray], offset to end of compressed data [int], valid [bool])
	class DecompException(Exception):
	def __init__(self,message):
	self.message = message
	def next_byte():
	nonlocal offset, rom
	if offset >= len(rom):
	raise DecompException("Out of data at [%06X]" % (offset))
	r = rom[offset]
	offset += 1
	return r
	d = bytearray()
	result = True
	try:
	while True:
	control = next_byte()
	if control == 0xFF: # finish
	break
	mode = control & 0xE0
	length = control & 0x1F
	if mode == 0xE0: # E/111 extend length
	mode = (control << 3) & 0xE0 # control is replaced by next three bits
	l = next_byte()
	if DEBUG:
	print("ext: [%06X] %02X %02X" % (offset,control,l))
	length = l \| ((control & 0x03) << 8)
	length += 1
	# modes
	if DEBUG:
	MODENAME = [
	"0/000 copy",
	"2/001 byte repeat",
	"4/010 word repeat",
	"6/011 incrementing",
	"8/100 abs reference",
	"A/101 abs reference invert",
	"C/110 relative reference",
	"E/111 relative reference invert" ]
	print("mode: %02X [%06X] (%4X) %4d %s" % (mode,offset,len(d),length,MODENAME[mode>>5]))
	if mode == 0x00: # 0/000 copy
	for i in range(length):
	d.append(next_byte())
	elif mode == 0x20: # 2/001 byte repeat
	r = next_byte()
	for i in range(length):
	d.append(r)
	elif mode == 0x40: # 4/010 word repeat
	r0 = next_byte()
	r1 = next_byte()
	for i in range(length):
	if i&1:
	d.append(r1)
	else:
	d.append(r0)
	elif mode == 0x60: # 3/011 incrementing
	r = next_byte()
	for i in range(length):
	d.append(r)
	r = (r+1) & 0xFF
	else:
	# 8/100 absolute back reference
	# A/101 absolute back reference + invert
	# C/110 relative back reference
	# E/111 relative back reference + invert
	ref = 0
	if mode & 0x40: # relative
	ref = len(d) - next_byte()
	if ref < 0:
	raise DecompException("Out of range relative reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d)))
	else: # absolute
	ref = next_byte()
	ref \|= next_byte() << 8
	if DEBUG:
	print("ref: (%02X) at [%06X] to [%04X] (%04X)" % (mode,offset,ref,len(d)))
	for i in range(length):
	if (ref >= len(d)):
	raise DecompException("Out of range reference at [%06X] to [%04X] (%04X)" % (offset,ref,len(d)))
	r = d[ref]
	ref += 1
	if mode & 0x20: # invert
	r ^= 0xFF
	d.append(r)
	except DecompException as e:
	if print_error: print(e)
	result = False
	return (d,offset,result)



	# Renders a text dialog from given text data
	def simcity_text_render(tiledata,textdata,offset,columns=24,rows=14,grid=False,reuse=False):
	# tiledata = 1-bpp 8x8 tileset
	# textdata = 16-bit words indexing tiledata tiles
	# each row of text comes as a pair, first the top half then the bottom
	# offset = offset into textdata
	# grid = add a 1 pixel grid separating the characters, othersi
	# return (rendered image [PIL.image], tile pair [set], invalid tiles present [bool])
	iw = 8*columns
	ih = 16*rows
	clear_colour = 2
	if grid:
	iw += 1 + columns
	ih += 1 + rows
	clear_colour = 3
	if iw < 1: iw = 1
	if ih < 1: ih = 1
	img = PIL.Image.new("P",(iw,ih),clear_colour)
	img.putpalette(PALETTE)
	pairs = set()
	reused = set()
	invalid = False
	for rd in range(rows):
	for rh in range(2):
	r = (rd*2)+rh
	for c in range(columns):
	do = offset + ((r(columns))+c)2
	if (do+2) > len(textdata): continue
	ti = textdata[do+0] \| (textdata[do+1] << 8)
	if ((ti*8)+8) > len(tiledata):
	invalid = True
	continue
	if rh == 0: # check pairing
	do2 = do + (columns*2)
	if (do2+2) <= len(textdata):
	ti2 = textdata[do2+0] \| (textdata[do2+1] << 8)
	pairs.add((ti,ti2))
	ox = c * 8
	oy = r * 8
	if grid:
	ox += 1 + c
	oy += 1 + (r//2)
	for y in range(8):
	bits = tiledata[(ti*8)+y]
	for x in range(8):
	p = (bits >> (7-x)) & 1
	if ti in reused: p += 4 # recolor reused tiles
	img.putpixel((ox+x,oy+y),p)
	if reuse: reused.add(ti)
	return (img,pairs,invalid)


	# Renders an entire compressed data packet as text
	def simcity_text_packet_render(tiledata,textdata,columns=24,grid=False,reuse=False):
	stride = (columns22)
	return simcity_text_render(tiledata,textdata,0,columns,(len(textdata)+stride-1)//stride,grid,reuse)


	# Renders a 1bpp 8x8 tileset
	def tileset_render(tiledata,columns=16):
	tiles = (len(tiledata)+7) // 8
	if tiles < 1: tiles = 1
	rows = (tiles + columns - 1) // columns
	iw = 1 + 9 * columns
	ih = 1 + 9 * rows
	img = PIL.Image.new("P",(iw,ih),3)
	img.putpalette(PALETTE)
	for r in range(rows):
	for c in range(columns):
	ti = c + (r * columns)
	ox = 1 + 9 * c
	oy = 1 + 9 * r
	for y in range(8):
	do = (ti*8)+y
	if do >= len(tiledata):
	continue
	bits = tiledata[do]
	for x in range(8):
	p = (bits >> (7-x)) & 1
	img.putpixel((ox+x,oy+y),p)
	return img


	# Reduces 2bpp scenario tileset to 1bpp
	def tilescen_reduce(tilescen):
	d = bytearray()
	for i in range(0,len(tilescen)-1,2):
	d.append(tilescen[i+0])
	return d

	# Reduces non-JP 2bpp tilesets to 1bpp
	def tileset_reduce(tiledata):
	d = bytearray()
	for i in range(0,len(tiledata)-1,2):
	d.append((tiledata[i+0] \| tiledata[i+1]) ^ 0xFF)
	return d

	# Converts scenario text to the dialog format (just needs to remove high bits)
	def textscen_convert(textscen):
	d = bytearray()
	for i in range(0,len(textscen)-1,2):
	ti = (textscen[i+0] \| (textscen[i+1] << 8)) % 1024
	d.append(ti & 0xFF)
	d.append(ti >> 8)
	return d

	# Expands non-JP ASCII text to 16-bit
	def textdata_expand(textdata,columns=24):
	d = bytearray()
	c = 0
	for b in textdata:
	if b != 0xFF:
	b -= ASCII_OFFSET
	if b >= 0:
	d.append(b)
	d.append(0)
	else: # error
	d.append(0xFF)
	d.append(0xFF)
	c = (c + 1) % columns
	else: # FF is end of text
	while c != 0:
	d.append(ord(' '))
	d.append(0)
	c = (c + 1) % columns
	for i in range(columns): # blank line to mark division
	d.append(0xFF)
	d.append(0xFF)
	return d



	#
	#
	# Main program
	#
	#

	rom = open(ROM,"rb").read()
	print("%s read..." % (ROM))

	# fetch the tile data from the ROM
	# before displaying text, 8x8 1-bpp TILEDATA is decompressed to 7E8800,
	# then these are transformed into 2-bpp SNES tiles as needed by the text,
	# using palettes to combine 2 1-bpp layers into a single tile for compact use of VRAM
	(tiledata,tiledata_end,valid) = nintendo_decompress(rom,TILEDATA)
	open(PREFIX+"tiledata.bin","wb").write(tiledata)
	tiledata_size = len(tiledata)
	if not JP: # collapse 2bpp to 1bpp
	tiledata = tileset_reduce(tiledata)
	tileset_render(tiledata).save(PREFIX+"tiledata.png")
	print("Tile data decompressed: %d bytes, compressed to %d bytes (%stiledata.bin/png)" % (tiledata_size,tiledata_end-TILEDATA,PREFIX))


	# dump text data, this seemed to be contained in exactly 2 compressed packets
	pairs = set()
	if JP:
	for textdata in TEXTDATA:
	fn = "%stext_%06X" % (PREFIX,textdata)
	(d,textdata_end,valid) = nintendo_decompress(rom,textdata)
	open(fn+".bin","wb").write(d)
	(img,textpairs,invalid) = simcity_text_packet_render(tiledata,d,COLS)
	img.save(fn+".png")
	print("Dumped text [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textdata,len(d),textdata_end-textdata,fn))
	pairs.update(textpairs)
	else: # non-Japanese versions just had a single uncompresesd ASCII block
	d = rom[TEXTDATA[0]:TEXTDATA[1]]
	de = textdata_expand(d,COLS)
	fn = "%stext_%06X" % (PREFIX,TEXTDATA[0])
	open(fn+".bin","wb").write(d)
	(img,textpairs,invalid) = simcity_text_packet_render(tiledata,de,COLS)
	img.save(fn+".png")
	print("Dumped text [%06X]: %d bytes (%s.bin/png)" % (TEXTDATA[0],len(d),fn))


	# dump character pairs
	if JP:
	def pairkey(p): # maximum of pairs indicates the order of the first appearance of the pairing
	return (max(p),p[0],p[1])
	pairsort = sorted(pairs,key=pairkey)
	pairdata = bytearray([0xFF]((len(pairsort)+PAIRCOLS-1)//PAIRCOLS)PAIRCOLS22)
	for i in range(len(pairsort)):
	(t0,t1) = pairsort[i]
	c = i % PAIRCOLS
	r = i // PAIRCOLS
	o = ((r * PAIRCOLS * 2) + c) * 2
	pairdata[o+0] = t0 & 0xFF
	pairdata[o+1] = t0 >> 8
	pairdata[o+(PAIRCOLS*2)+0] = t1 & 0xFF
	pairdata[o+(PAIRCOLS*2)+1] = t1 >> 8
	(img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True)
	img.save(PREFIX+"characters.png")
	print("Dumped characters: %d total (%scharacters.png)" % (len(pairsort),PREFIX))
	# check for unused tiles, make sure they're all accounted for
	usedtiles = set()
	for (t0,t1) in pairs:
	usedtiles.add(t0)
	usedtiles.add(t1)
	unused = 0
	usedmax = 0 if len(usedtiles)==0 else max(usedtiles)
	for i in range(usedmax+1):
	if i not in usedtiles:
	print("Unused tile: %03X" % (i))
	unused += 1
	if not unused: print("No unused tiles found")
	print("Used tile range: 000-%03X" % (usedmax))
	(img,textpairs,invalid) = simcity_text_packet_render(tiledata,pairdata,PAIRCOLS,True,True)
	img.save(PREFIX+"characters_reuse.png")
	print("Characters used: %d (%scharacter_reuse.png)" % (len(usedtiles),PREFIX))


	# separate tileset for scenarios
	(tilescen,tilescen_end,valid) = nintendo_decompress(rom,TILESCEN)
	open(PREFIX+"tilescen.bin","wb").write(tiledata)
	tilescen_size = len(tilescen)
	tilescen = tilescen_reduce(tilescen)
	if JP:
	tilescen_count = len(tilescen) // 16
	scenpair = bytearray([0xFF]((tilescen_count+PAIRCOLS-1)//PAIRCOLS)PAIRCOLS22)
	for i in range(tilescen_count):
	c = i % PAIRCOLS
	r = i // PAIRCOLS
	o = ((r * PAIRCOLS * 2) + c) * 2
	ti = (i % 16) + 16 * (2 * (i // 16))
	scenpair[o+0] = ti & 0xFF
	scenpair[o+1] = ti >> 8
	scenpair[o+(PAIRCOLS*2)+0] = (ti + 16) & 0xFF
	scenpair[o+(PAIRCOLS*2)+1] = (ti + 16) >> 8
	(img,scenpair,invalid) = simcity_text_packet_render(tilescen,scenpair,PAIRCOLS,True)
	else:
	img = tileset_render(tilescen)
	img.save(PREFIX+"tilescen.png")
	print("Tile scenario decompressed: %d bytes, compressed to %d bytes (%stilescen.bin/png)" % (tilescen_size,tilescen_end-TILESCEN,PREFIX))


	# scenarios
	for (textscen_off,count) in TEXTSCEN:
	for i in range(count):
	fn = "%sscen_%06X" % (PREFIX,textscen_off)
	(d,textscen_end,result) = nintendo_decompress(rom,textscen_off)
	open(fn+".bin","wb").write(d)
	(img,scenpairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(d),SCENCOLS)
	img.save(fn+".png")
	print("Dumped scenario [%06X]: %d bytes, compressed to %d bytes (%s.bin/png)" % (textscen_off,len(d),textscen_end-textscen_off,fn))
	textscen_off = textscen_end


	# search for all potential valid compressed text packets in ROM
	# (this is how the TEXTDATA and TILEDATA packets were found)
	if False:
	o_progress = ~0
	o = 0
	SEARCH_MIN = COLS22 # minimum packet size of one row
	#SEARCH_MIN = 2048
	while o < len(rom):
	if (o & ~0x00000FFF) != (o_progress & ~0x00000FFF):
	print("Searching [%06X]..." % (o & ~0x00000FFF))
	o_progress = o
	(sd,so,sresult) = nintendo_decompress(rom,o,False)
	if (not sresult) or (len(sd) < SEARCH_MIN):
	# no packet found, advance to next byte
	o += 1
	else:
	print("Found: [%06X]-[%06X] %05X (%d) bytes" % (o,so,len(sd),len(sd)))
	filename = "%06X" % (o)
	if False: # looking for text
	(img,pairs,invalid) = simcity_text_packet_render(tiledata,sd if JP else textdata_expand(sd,COLS),COLS)
	img.save(PREFIX+"dumptext_"+filename+".png")
	if False: # looking for scenario text
	(img,pairs,invalid) = simcity_text_packet_render(tilescen,textscen_convert(sd),SCENCOLS)
	img.save(PREFIX+"dumpscen_"+filename+".png")
	if False: # looking for tileset
	img = tileset_render(sd if JP else tileset_reduce(sd))
	img.save(PREFIX+"dumptile_"+filename+".png")
	if False: # dump data
	open(PREFIX+filename+".bin","wb").write(sd)
	# advance to end of packet
	o = so