Created
February 7, 2022 11:53
-
-
Save bbbradsmith/935c03fc31d81ad29b489a943bc79a5c to your computer and use it in GitHub Desktop.
Compressor and decompressor for Terranigma (SNES)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Terranigma data compressor and decompressor | |
# Brad Smith, 2022-02-07 | |
# https://rainwarrior.ca | |
# | |
# Format reference: | |
# https://www.terranigma.be/index.php/Compression | |
import sys | |
def usage(): | |
print("Usage:") | |
print(" Decompress:") | |
print(" d compressed.bin uncompressed.bin") | |
print(" Compress:") | |
print(" c uncompressed.bin compressed.bin") | |
print(" Extract from ROM and decompress:") | |
print(" e terranigma.sfc 380000 title.chr") | |
print(" Compress and inject into ROM:") | |
print(" i terranigma.sfc 380000 title.chr") | |
print(" ROM addresses are in hexadecimal.") | |
print() | |
raise Exception("Invalid command line arguments.") | |
debug = False # True to debug the decompression | |
def dbg(msg): | |
if debug: | |
print(msg) | |
def decompress(rom, address): | |
def read_byte(): | |
nonlocal address | |
nonlocal rom | |
byte = 0xFF | |
#print("read_byte %06X" % address) | |
if address < len(rom): | |
byte = rom[address] | |
address += 1 | |
return byte | |
print("Packet at %06X" % address) | |
packet_start = address | |
# header: 00, 16-bit length, 1st byte of output data | |
h0 = read_byte() | |
if h0 != 0: | |
print("Packet header should start with 00. Found: %02X" % h0) | |
plen = read_byte() + (read_byte() << 8) | |
print("Length: $%04X" % plen) | |
d = bytearray() | |
d.append(read_byte()) | |
dbg("%06X: %02X" % (address-1,d[0])) | |
# data: | |
# 8-bit control bitstream, 1 byte appears whenever the bitstream is empty | |
# 1 = copy next byte | |
# data: byte | |
# 00xx = copy xx+2 bytes from offset (8-bit) - $100 | |
# data: offset byte | |
# 01 = copy bytes from long offset (13-bit) - $2000 | |
# data: 2 bytes = 13-bit offset, 3-bit short-length (+2) | |
# data: 3 bytes = 13-bit offset, 3-bit = 0, 8-bit long-length (+1) | |
# long-length = 0 ends data (offset also 0) | |
control_bits = 0 | |
control_shift = 0 | |
def read_control(): | |
nonlocal control_bits | |
nonlocal control_shift | |
if control_bits == 0: | |
control_bits = 8 | |
control_shift = read_byte() | |
dbg("%06X: control $%02X" % (address-1,control_shift)) | |
control_bits -= 1 | |
bit = (control_shift >> 7) & 1 # return high bit | |
control_shift = (control_shift << 1) & 0xFF | |
return bit | |
plen += 255 # if no end found, read at most 256 extra bytes | |
ended = False | |
while plen > 0: | |
#print("%06X" % address) | |
if read_control() == 1: # control 1 = literal data | |
d.append(read_byte()) | |
dbg("%06X: 1 %02X" % (address-1,rom[address-1])) | |
else: | |
if read_control() == 0: # control 00xx = short copy | |
clen = (read_control() << 1) + read_control() + 2 # 2-bit copy length in control stream | |
o = (len(d) - 0x100) + read_byte() | |
if (o < 0): | |
print("Error at %6X: copy offset (%X) past beginning of output data (%X)." % (address-1,len(d)-o,len(d))) | |
o = 0 | |
dbg("%06X: 00 $%X (-$%02X)" % (address-1,clen,len(d)-o)) | |
for i in range(clen): | |
d.append(d[o+i]) | |
else: # control 01 = long copy | |
astart = address | |
p = (read_byte() << 8) + read_byte() | |
o = p >> 3 | |
o = (len(d) - 0x2000) + o | |
clen = (p & 7) # 3-bit short length | |
if clen > 0: # short length | |
clen += 2 | |
else: # long length if short length = 0 | |
clen = read_byte() + 1 | |
if clen == 1: | |
ended = True | |
dbg("%06X: 01 end -%04X" % (astart,len(d)-o)) | |
break | |
if (o < 0): | |
print("Error at %6X: copy offset (%X) past beginning of output data (%X)." % (address-1,len(d)-o,len(d))) | |
o = 0 | |
dbg("%06X: 01 $%02X -$%04X" % (astart,clen,len(d)-o)) | |
for i in range(clen): | |
d.append(d[o+i]) | |
print("End of packet: %06X" % address) | |
print("Compressed size: $%04X" % (address-packet_start)) | |
if address > len(rom): | |
print("Packet longer than ROM length? End at: %06" % address) | |
if not ended: | |
print("No end-of-packet 01 $00 $00 $00 found.") | |
return d | |
def compress(d): | |
print("Compressing $%04X bytes..." % len(d)) | |
c = bytearray() | |
if (len(d) < 1): | |
print("Warning: data too small!") | |
return c | |
c.append(0) | |
if (len(d) >= 65536): | |
print("Warning: data too large for 16-bit packet size.") | |
c.append(len(d) & 0xFF) | |
c.append((len(d)>>8) & 0xFF) | |
c.append(d[0]) # first byte is uncompressed | |
pos = 1 | |
control_bits = 0 | |
control_pos = 0 | |
def control_bit(b): | |
nonlocal control_bits | |
nonlocal control_pos | |
nonlocal c | |
if control_bits < 1: | |
control_pos = len(c) | |
c.append(0) # new control byte | |
control_bits = 8 | |
# fill in current working control byte | |
control_bits -= 1 | |
c[control_pos] = (c[control_pos] << 1) | b | |
while pos < len(d): | |
# search for longest match in last $2000 bytes (overlap, i.e. self-repetition allowed) | |
ms = max(0,pos-0x2000) | |
best_len = 0 | |
best_p = 0 | |
for p in range(pos-1,ms-1,-1): | |
plen = 0 | |
for j in range(0,0x100): | |
if (pos+j) >= len(d): break | |
if d[p+j] != d[pos+j]: break | |
plen = j+1 | |
if plen > best_len: | |
best_p = p | |
best_len = plen | |
# choose best encoding for match | |
if (best_len >= 2) and (best_len <= 5) and (best_p >= (pos-0x100)): # 8-bit short copy | |
control_bit(0) | |
control_bit(0) | |
lb = best_len - 2 | |
control_bit((lb >> 1) & 1) | |
control_bit((lb >> 0) & 1) | |
c.append(best_p-(pos-0x100)) | |
pos += best_len | |
continue | |
elif best_len <= 2: # literal | |
control_bit(1) | |
c.append(d[pos]) | |
pos += 1 | |
continue | |
# best_len > 2 | |
op = best_p-(pos-0x2000) # 13-bit long offset | |
control_bit(0) | |
control_bit(1) | |
c.append((op >> 5) & 0xFF) | |
if best_len <= 9: # short length | |
c.append(((op << 3) & 0xFF) | (best_len-2)) | |
else: # long length | |
c.append((op << 3) & 0xFF) | |
c.append(best_len-1) | |
pos += best_len | |
# terminal data entry | |
control_bit(0) | |
control_bit(1) | |
c.append(0) | |
c.append(0) | |
c.append(0) | |
# conclude final control bit | |
c[control_pos] <<= control_bits | |
print("Output size: $%04X" % len(c)) | |
if debug: | |
print("Verify compression:") | |
assert(decompress(c,0) == d) | |
return c | |
if __name__ == "__main__": | |
if len(sys.argv) < 4: usage() | |
command = sys.argv[1].lower() | |
if command == "d": | |
open(sys.argv[3],"wb").write(decompress(open(sys.argv[2],"rb").read(),0)) | |
exit(0) | |
if command == "c": | |
open(sys.argv[3],"wb").write(compress(open(sys.argv[2],"rb").read())) | |
exit(0) | |
if len(sys.argv) < 5: usage() | |
if command == "e": | |
open(sys.argv[4],"wb").write(decompress(open(sys.argv[2],"rb").read(),int(sys.argv[3],base=16))) | |
exit(0) | |
if command == "i": | |
r = bytearray(open(sys.argv[2],"rb").read()) | |
d = compress(open(sys.argv[4],"rb").read()) | |
a = int(sys.argv[3],base=16) | |
r[a:a+len(d)] = d | |
open(sys.argv[2],"wb").write(r) | |
exit(0) | |
usage() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment