Created
April 15, 2012 17:30
-
-
Save lifthrasiir/2394028 to your computer and use it in GitHub Desktop.
quick and dirty text compressor for DCPU-16
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import heapq, sys | |
p = open(sys.argv[1],'rb').read() | |
p += '\0' | |
freq = {} | |
for i in p: freq[i] = freq.get(i,0) + 1 | |
l = [(v, [('', k)]) for k, v in freq.items()] | |
heapq.heapify(l) | |
while len(l) > 1: | |
a = heapq.heappop(l) | |
b = heapq.heappop(l) | |
heapq.heappush(l, (a[0]+b[0], [('0'+i,j) for i, j in a[1]] + [('1'+i,j) for i,j in b[1]])) | |
tree = l[0][1] | |
print ' ;', hex(len(p)), 'words uncompressed' | |
print ' ;', hex((len(p)+1)/2), 'words packed' | |
print ' ;', hex((sum(freq[j]*len(i) for i,j in tree)+15)//16), 'words compressed' | |
maxcodelen = max(len(i) for i,j in tree) | |
unsort = [] | |
codestart = [] | |
codeend = [] | |
remap = {} | |
c = 0 | |
for l in xrange(maxcodelen+1): | |
unsort.append([]) | |
codestart.append(c) | |
for i, j in tree: | |
if len(i) == l: | |
unsort[l].append(j) | |
remap[j] = bin(c)[2:].zfill(l) | |
c += 1 | |
codeend.append(c) | |
c <<= 1 | |
for v, k in sorted((v,k) for k,v in remap.items()): | |
print >>sys.stderr, '%-20s -> %r' % (v, k) | |
print ' SET SP, compressed' | |
print ' SET C, 16' | |
print ' SET Y, 0' | |
print ':consume' | |
print ' SHR B, C' | |
print ':consume_bit' | |
print ' IFE C, 0' | |
print ' SET PC, consume_end' | |
print ' IFN Y, 0' | |
print ' SET PC, consume_skip' | |
print ' SET X, POP' | |
print ' SET Y, 16' | |
print ':consume_skip' | |
print ' SHL B, 1' | |
print ' SUB Y, 1' | |
print ' SUB C, 1' | |
print ' SHL X, 1' | |
print ' BOR B, O' | |
print ' SET PC, consume_bit' | |
print ':consume_end' | |
print ' SET A, B' | |
for l, c in enumerate(codeend): | |
if not unsort[l]: continue | |
c <<= 16-l | |
if c != 0x10000: | |
print ' IFG %#x, A' % c | |
print ' SET PC, decode_len%d' % l | |
bodys = list(enumerate(codestart)) | |
for l, c in bodys[-1:] + bodys[:-1]: # reorder statements... | |
if not unsort[l]: continue | |
if l != maxcodelen: | |
print ':decode_len%d' % l | |
if 16-l > 0: | |
print ' SHR A, %d' % (16-l) | |
print ' SET B, O' | |
if c > 0: | |
print ' SUB A, %#x' % c | |
print ' SET A, [A+unsort_len%d]' % l | |
print ' JSR putc' | |
print ' SET C, %d' % l | |
print ' SET PC, consume' | |
print ''' | |
:screenoff | |
DAT 0 | |
:newline | |
SET C, [screenoff] | |
AND C, 0xffe0 | |
ADD C, 0x20 | |
SET [screenoff], C | |
IFG 0x180, C | |
SET PC, POP | |
:scroll | |
SET C, 0 | |
:scroll_copy | |
SET [C+0x8000], [C+0x8020] | |
ADD C, 1 | |
IFG 0x160, C | |
SET PC, scroll_copy | |
:scroll_fill | |
SET [C+0x8000], 0 | |
ADD C, 1 | |
IFG 0x180, C | |
SET PC, scroll_fill | |
SUB [screenoff], 0x20 | |
SET PC, POP | |
:putc | |
IFE A, 10 | |
SET PC, newline | |
IFE A, 0 | |
SUB PC, 1 ; temporary hack! | |
IFG [screenoff], 0x17e | |
JSR scroll | |
SET C, [screenoff] | |
BOR A, 0xf000 | |
SET [C+0x8000], A | |
ADD C, 1 | |
SET [screenoff], C | |
SET PC, POP | |
''' | |
for l, chs in enumerate(unsort): | |
if chs: | |
print ':unsort_len%d' % l | |
print ' DAT %s' % ', '.join(hex(ord(c)) for c in chs) | |
print ':compressed' | |
bits = ''.join(remap[c] for c in p) | |
for i in xrange(0, len(bits), 128): | |
parts = bits[i:i+128] | |
if len(parts) % 16 != 0: parts += '0' * (16 - len(parts) % 16) | |
print ' DAT %s' % ', '.join('%#06x' % int(parts[j:j+16],2) for j in xrange(0,len(parts),16)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment