Created
March 19, 2024 16:43
-
-
Save sh1boot/ca35223a67637a83ef5f8689d05255ff to your computer and use it in GitHub Desktop.
adler32 generic SIMD calculation method
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstdio> | |
#include <cstring> | |
#include <cstdlib> | |
#include <cstdint> | |
#define VECLEN 16 | |
#define ADLER_MOD 65521 | |
typedef uint8_t vuint8_t __attribute__((ext_vector_type(VECLEN))); | |
typedef uint16_t vuint16_t __attribute__((ext_vector_type(VECLEN))); | |
typedef uint32_t vuint32_t __attribute__((ext_vector_type(VECLEN))); | |
uint32_t dut(uint32_t sum_in, uint8_t const* buffer, size_t count) { | |
vuint16_t asum16 = { 0 }; | |
vuint16_t bsum16 = { 0 }; | |
vuint16_t binc16 = { 0 }; | |
size_t head = count % VECLEN; | |
if (head > 0) { | |
uint8_t tmp[VECLEN] = { 0 }; | |
memcpy(tmp + VECLEN - head, buffer, head); | |
vuint8_t in; | |
memcpy(&in, tmp, VECLEN); | |
vuint16_t in16 = __builtin_convertvector(in, vuint16_t); | |
asum16 = in16; | |
binc16 = in16; | |
} | |
for (size_t i = head; i + VECLEN <= count; i += VECLEN) { | |
vuint8_t in; | |
memcpy(&in, buffer + i, VECLEN); | |
vuint16_t in16 = __builtin_convertvector(in, vuint16_t); | |
auto old = bsum16; // carry possible every iteration | |
bsum16 += binc16; | |
for (int i = 0; i < VECLEN; ++i) bsum16[i] -= (bsum16[i] < old[i]) ? ADLER_MOD : 0; | |
old = asum16; // carry possible once per 256 iterations | |
asum16 += in16; | |
for (int i = 0; i < VECLEN; ++i) asum16[i] -= (asum16[i] < old[i]) ? ADLER_MOD : 0; | |
old = binc16; // carry possible once per 256 iterations, but we need to know which iteration for the cumulative effect on bsum16. | |
binc16 += in16; | |
for (int i = 0; i < VECLEN; ++i) binc16[i] -= (binc16[i] < old[i]) ? ADLER_MOD : 0; | |
} | |
vuint32_t asum32 = __builtin_convertvector(asum16, vuint32_t); | |
vuint32_t bsum32 = __builtin_convertvector(bsum16, vuint32_t); | |
vuint32_t binc32 = __builtin_convertvector(binc16, vuint32_t); | |
constexpr vuint32_t off = { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 }; // TODO: don't hard-code VECLEN | |
bsum32 = binc32 * off + bsum32 * VECLEN; | |
asum16 = __builtin_convertvector(asum32 % ADLER_MOD, vuint16_t); | |
bsum16 = __builtin_convertvector(bsum32 % ADLER_MOD, vuint16_t); | |
uint32_t a = sum_in & 0xffff; | |
uint32_t b = ((sum_in >> 16) + a * (count % ADLER_MOD)) % ADLER_MOD; | |
for (int i = 0; i < VECLEN; ++i) { | |
a += asum16[i]; | |
b += bsum16[i]; | |
} | |
a %= ADLER_MOD; | |
b %= ADLER_MOD; | |
return (b << 16) | a; | |
} | |
uint32_t ref(uint32_t sum_in, uint8_t const* buffer, size_t count) { | |
uint64_t a = sum_in & 0xffff; | |
uint64_t b = sum_in >> 16; | |
for (size_t i = 0; i < count; ++i) { | |
a += buffer[i] & 255; | |
b += a; | |
a %= ADLER_MOD; | |
b %= ADLER_MOD; | |
} | |
return (b << 16) | a; | |
} | |
int main(void) { | |
uint8_t testbuf[4096] = { 0 }; | |
int max_fail = 10; | |
uint32_t sum_start = 1; | |
uint32_t buffer_sum = 0; | |
for (int i = 0; i < 1000000; ++i) { | |
size_t len = rand() % 4096 + 1; | |
len -= len % VECLEN; | |
uint32_t ck_dut = dut(sum_start, testbuf, len); | |
uint32_t ck_ref = ref(sum_start, testbuf, len); | |
if (ck_dut != ck_ref) { | |
printf("i:%d, bs: 0x%04x len:%zu(r:%zu) 0x%08x != 0x%08x\n", i, buffer_sum & 0xffff, len, len % VECLEN, ck_dut, ck_ref); | |
if (--max_fail <= 0) break; | |
} | |
int r = rand() & 255; | |
testbuf[i % 4096] += r; | |
buffer_sum += r; | |
sum_start = ck_ref; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment