Created
October 13, 2023 03:04
-
-
Save luckytyphlosion/120fe0791fde82b5bcc2286caeaed2d8 to your computer and use it in GitHub Desktop.
Faster LZ77UnCompWRAM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LZ77UnCompWRAMOptimized: @ 0x000010FC | |
push {r4, r5, r6, lr} | |
// read in data header in r5 | |
// Data header (32bit) | |
// Bit 0-3 Reserved | |
// Bit 4-7 Compressed type (must be 1 for LZ77) | |
// Bit 8-31 Size of decompressed data | |
ldr r5, [r0], #4 | |
// store decompressed size in r2 | |
lsr r2, r5, #8 | |
// main loop | |
cmp r2, #0 | |
ble LZ77_Done | |
LZ77_MainLoop: | |
// read in Flag Byte | |
// Flag data (8bit) | |
// Bit 0-7 Type Flags for next 8 Blocks, MSB first | |
ldrb lr, [r0], #1 | |
// shift to the highest byte | |
lsl lr, lr, #24 | |
// 8 blocks so set counter (r4) to 8 | |
mov r4, #8 | |
b LZ77_EightBlockLoop | |
LZ77_HandleCompressedData: | |
// reading in block type 1 Part 1 into r5 | |
// Block Type 1 Part 1 - Compressed - Copy N+3 Bytes from Dest-Disp-1 to Dest | |
// Bit 0-3 Disp MSBs | |
// Bit 4-7 Number of bytes to copy (minus 3) | |
// byte copy range: [3, 18] | |
ldrb r5, [r0], #1 | |
// 18 -> 0 | |
// 17 -> 1 | |
// 16 -> 2 | |
// ... | |
// 3 -> 15 | |
// formula: do 18 - x | |
// want to calculate r3 = 18 - (3 + (numBytesToCopy)) | |
// r3 = 18 - 3 - (numBytesToCopy) | |
// r3 = 15 - numBytesToCopy | |
// but then also need to do r2 = r2 - (3 + (numBytesToCopy)) | |
// r2 = r2 - 3 - numBytesToCopy | |
// r2 = r2 - 18 + 18 - 3 - numBytesToCopy | |
// r2 = r2 - 18 + 15 - numBytesToCopy | |
mov r6, #3 | |
// r3 = 3 + (numBytesToCopy) | |
add r3, r6, r5, asr #4 | |
// get displacement high bits | |
and r5, r5, #0xf | |
// Now reading Block Type 1 Part 2 into r6 | |
// Block type 1 Part 2 | |
// Bit 0-7 Disp LSBs | |
ldrb r6, [r0], #1 | |
// combine low and high bits into r6 | |
orr r6, r6, r5, lsl #8 | |
// +1 because of reasons | |
add r6, r6, #1 | |
// subtract how many bytes are going to be copied from the size | |
subs r2, r2, r3 | |
// do duff's device | |
// https://en.wikipedia.org/wiki/Duff%27s_device | |
// calculate pc offset | |
rsb r3, r3, #18 | |
// jump | |
add pc, pc, r3, lsl #3 | |
nop | |
.rept 18 | |
ldrb r5, [r1, -r6] | |
strb r5, [r1], #1 | |
.endr | |
// cpsr flags still preserved from earlier | |
// check if no more bytes have to be copied | |
ble LZ77_Done | |
// check if end of the block | |
subs r4, r4, #1 | |
ble LZ77_MainLoop | |
LZ77_EightBlockLoop: | |
// check if compressed data (bit set) | |
lsls lr, lr, #1 | |
bcs LZ77_HandleCompressedData | |
// uncompressed data can only be 1 byte long | |
// copy one byte of uncompressed data | |
ldrb r6, [r0], #1 | |
strb r6, [r1], #1 | |
subs r2, r2, #1 | |
ble LZ77_Done | |
LZ77_EightBlockLoop_HandleLoop: | |
// check if we're done with the 8 blocks | |
subs r4, r4, #1 | |
bgt LZ77_EightBlockLoop // go back to main loop if so | |
// no need to check if r2 is 0 since already checked elsewhere | |
b LZ77_MainLoop | |
LZ77_Done: | |
pop {r4, r5, r6, lr} | |
bx lr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment