Created
December 19, 2023 21:46
-
-
Save pascaldekloe/1d3e5e407dffb781fc2a31e2033441ea to your computer and use it in GitHub Desktop.
Experimental Integer Compression (Status Unknown)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.global _pack32dec64 | |
.text | |
.align 2 | |
// Decode64 reads X2 amount of bytes at address X1 and it writes the 32 | |
// encoded-values to address X0. The first delta is applied against X3. | |
// X2 must be a multiple of 4, and no more than 32 × 8 (bytes). | |
_pack32dec64: | |
// TODO(pascaldekloe): validate X2 | |
and x2, x2, 252 | |
adr x5, decode_table | |
add x5, x5, x2, lsl 1 | |
br x5 | |
decode_table: | |
adr x4, dec0b64 | |
br x4 | |
adr x4, dec1b64 | |
br x4 | |
adr x4, dec2b64 | |
br x4 | |
adr x4, dec3b64 | |
br x4 | |
.align 2 | |
dec0b64: // no change; just copy offset | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
stp x3, x3, [x0], 16 | |
ret | |
dec1b64: // 1-bit reverse delta zig-zag encoding | |
ldr w4, [x1] // fetch all 4 bytes of input | |
// A 1-bit zig-zag encoding of reverse delta is either 0 for | |
// no change against the previous, or 1 for an increment. | |
sbfx x2, x4, 0, 1 // fetch bit #0 | |
add x2, x2, x3 // against previous | |
sbfx x3, x4, 1, 1 // fetch bit #1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 2, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 3, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 4, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 5, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 6, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 7, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 8, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 9, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 10, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 11, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 12, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 13, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 14, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 15, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 16, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 17, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 18, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 19, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 20, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 21, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 22, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 23, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 24, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 25, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 26, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 27, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 28, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 29, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
sbfx x2, x4, 30, 1 | |
add x2, x2, x3 | |
sbfx x3, x4, 31, 1 | |
add x3, x3, x2 | |
stp x2, x3, [x0], 16 | |
ret | |
dec2b64: // 2-bit reverse delta zig-zag encoding | |
ldr x4, [x1] // fetch all 8 bytes of input | |
sbfx x5, x4, 0, 1 // sign-extend first bit (equal to NEG) | |
ubfx x2, x4, 1, 1 // shift extract remaining bit | |
eor x2, x2, x5 | |
sub x2, x3, x2 // against previous | |
sbfx x5, x4, 2, 1 // again … | |
ubfx x3, x4, 3, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 4, 1 | |
ubfx x2, x4, 5, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 6, 1 | |
ubfx x3, x4, 7, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 8, 1 | |
ubfx x2, x4, 9, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 10, 1 | |
ubfx x3, x4, 11, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 12, 1 | |
ubfx x2, x4, 13, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 14, 1 | |
ubfx x3, x4, 15, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 16, 1 | |
ubfx x2, x4, 17, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 18, 1 | |
ubfx x3, x4, 19, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 20, 1 | |
ubfx x2, x4, 21, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 22, 1 | |
ubfx x3, x4, 23, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 24, 1 | |
ubfx x2, x4, 25, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 26, 1 | |
ubfx x3, x4, 27, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 28, 1 | |
ubfx x2, x4, 29, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 30, 1 | |
ubfx x3, x4, 31, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 32, 1 | |
ubfx x2, x4, 33, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 34, 1 | |
ubfx x3, x4, 35, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 36, 1 | |
ubfx x2, x4, 37, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 37, 1 | |
ubfx x3, x4, 39, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 40, 1 | |
ubfx x2, x4, 41, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 42, 1 | |
ubfx x3, x4, 43, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 44, 1 | |
ubfx x2, x4, 45, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 46, 1 | |
ubfx x3, x4, 47, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 48, 1 | |
ubfx x2, x4, 49, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 50, 1 | |
ubfx x3, x4, 51, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 52, 1 | |
ubfx x2, x4, 53, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 54, 1 | |
ubfx x3, x4, 55, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 56, 1 | |
ubfx x2, x4, 57, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 58, 1 | |
ubfx x3, x4, 59, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
sbfx x5, x4, 60, 1 | |
ubfx x2, x4, 61, 1 | |
eor x2, x2, x5 | |
sub x2, x3, x2 | |
sbfx x5, x4, 62, 1 | |
ubfx x3, x4, 63, 1 | |
eor x3, x3, x5 | |
sub x3, x2, x3 | |
stp x2, x3, [x0], 16 | |
ret | |
dec3b64: // 3-bit reverse delta zig-zag encoding | |
ret | |
NLHTSP: | |
.ascii "\n\t " | |
.align 2 | |
// Dump64 writes the (32) 64-bit values from the stack in hexadecimal. | |
_dump64: | |
sub sp, sp, #16 // grow stack | |
str lr, [sp] // persist procedure call link register | |
// print tab ident | |
mov x0, 1 // file descriptor 1 is standard output | |
adr x1, NLHTSP+1 // data pointer | |
mov x2, #1 // data size | |
mov x16, #4 // write defined in <syscall.h> | |
svc #0x80 // invoke syscall | |
add x13, sp, #16 // first 64-bit integer | |
add x14, sp, #34*8 // last … | |
dump_next: | |
ldr x7, [x13], 8 | |
bl print_x7_hex | |
// print space suffix | |
mov x0, 1 // file descriptor 1 is standard output | |
adr x1, NLHTSP+2 // data pointer | |
mov x2, #1 // data size, read-only | |
mov x16, #4 // write defined in <syscall.h> | |
svc #0x80 // invoke syscall | |
cmp x13, x14 | |
ble dump_next | |
// print newline end | |
mov x0, 1 // file descriptor 1 is standard output | |
adr x1, NLHTSP // data pointer | |
mov x2, #1 // data size, read-only | |
mov x16, #4 // write defined in <syscall.h> | |
svc #0x80 // invoke syscall | |
ldr lr, [sp] // restore procedure call link register | |
add sp, sp, #16 // free stack | |
ret | |
hex_dict: | |
.ascii "0123456789abcdefgh" | |
.align 2 | |
// Print_x7_hex writes the register value in hexadecimal to standard ouput | |
print_x7_hex: | |
adr x6, hex_dict // digits | |
// map 8 most-significant nibbles into x8 | |
lsr x8, x7, #60 | |
ldrb w8, [x6, x8] // map to digit in 32-bit register | |
lsr x5, x7, #56 // 2nd nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 8 | |
lsr x5, x7, #52 // 3rd nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 16 | |
lsr x5, x7, #48 // 4th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 24 | |
lsr x5, x7, #44 // 5th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 32 | |
lsr x5, x7, #40 // 6th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 40 | |
lsr x5, x7, #36 // 7th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 48 | |
lsr x5, x7, #32 // 8th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x8, x8, x5, lsl 56 | |
// map 8 least-significant nibbles into x9 | |
lsr x9, x7, #28 // 9th nibble | |
and x9, x9, 0xf | |
ldrb w9, [x6, x9] | |
lsr x5, x7, #24 // 10th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 8 | |
lsr x5, x7, #20 // 11th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 16 | |
lsr x5, x7, #16 // 12th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 24 | |
lsr x5, x7, #12 // 13th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 32 | |
lsr x5, x7, #8 // 14th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 40 | |
lsr x5, x7, #4 // 15th nibble | |
and x5, x5, 0xf | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 48 | |
and x5, x7, 0xf // 15th nibble | |
ldrb w5, [x6, x5] | |
orr x9, x9, x5, lsl 56 | |
sub sp, sp, 16 // grow stack | |
stp x8, x9, [sp] // write hex string | |
mov x0, 1 // file descriptor 1 is standard output | |
mov x1, sp // data pointer | |
mov x2, #16 // data size, read-only | |
mov x16, #4 // write defined in <syscall.h> | |
svc #0x80 // invoke syscall | |
add sp, sp, #16 // free stack | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment