Last active
September 11, 2022 18:07
-
-
Save amonakov/66ecb476991804291de0da6747d2db23 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdint.h> | |
struct xy { | |
uint16_t x, y; | |
}; | |
uint32_t xy2z(struct xy *xy) | |
{ | |
uint64_t r = xy->x | ((uint64_t)xy->y << 32); | |
r |= r << 8; | |
r &= -1/0x101ull; | |
r |= r << 4; | |
r &= -1/0x11ull; | |
r |= r << 2; | |
r &= -1/0x5ull; | |
r |= r << 1; | |
r &= -1/0x3ull; | |
return r | (r >> 31); | |
} | |
struct xy z2xy(uint32_t z) | |
{ | |
uint64_t r = z | ((uint64_t)z << 33); | |
r &= ~(-1/3ull); | |
r += r << 1; | |
r &= ~(-1/5ull); | |
r += r << 2; | |
r &= ~(-1/0x11ull); | |
r += r << 4; | |
r &= ~(-1/0x101ull); | |
r += r << 8; | |
return (struct xy){ r >> 48, r >> 16 }; | |
} | |
#ifdef __SSE4_2__ | |
#include <immintrin.h> | |
#include <string.h> | |
/* It would be more efficient to encode 4 structs at a time with an additional | |
* _mm_clmulepi64_si128(r, r, 0x11). */ | |
uint32_t xy2z_sse(struct xy *xy) | |
{ | |
__m128i r = { 0 }; | |
memcpy(&r, xy, sizeof *xy); | |
r = _mm_clmulepi64_si128(r, r, 0); | |
__v4su u = (__v4su)r; | |
return u[0] + (u[1] << 1); | |
} | |
/* As above, this should be used for decoding 4 structs at a time instead. */ | |
struct xy z2xy_sse(uint32_t z) | |
{ | |
union { | |
__m128i m; | |
__v16qu qu; | |
__v8hu hu; | |
__v4su su; | |
} lo, hi; | |
__v16qu acbd = { | |
0, 1, 4, 5, | |
2, 3, 6, 7, | |
8, 9, 12, 13, | |
10, 11, 14, 15 | |
}; | |
__v16qu pack_lo = { | |
0, 2, -1, -1, | |
4, 6, -1, -1, | |
8, 10, -1, -1, | |
12, 14, -1, -1 | |
}; | |
__v16qu pack_hi = { | |
-1, -1, 0, 2, | |
-1, -1, 4, 6, | |
-1, -1, 8, 10, | |
-1, -1, 12, 14 | |
}; | |
lo.su = (__v4su){ z, 0, 0, 0 }; | |
hi.su = lo.su >> 1; | |
// ..DdCcBbAa -> ..0d0c0b0a | |
lo.su &= -1/3u; | |
// ..0d0c0b0a -> ..?d?cdbca | |
lo.su |= lo.su >> 3; | |
// -> ..0000dbca | |
lo.su &= 0x0f0f0f0f; | |
// -> ..0000dcba | |
lo.m = _mm_shuffle_epi8((__m128i)acbd, lo.m); | |
// -> ..0000hgfehgfedcba | |
lo.hu |= lo.hu >> 4; | |
lo.m = _mm_shuffle_epi8(lo.m, (__m128i)pack_lo); | |
hi.su &= -1/3u; | |
hi.su |= hi.su >> 3; | |
hi.su &= 0x0f0f0f0f; | |
hi.m = _mm_shuffle_epi8((__m128i)acbd, hi.m); | |
hi.hu |= hi.hu >> 4; | |
hi.m = _mm_shuffle_epi8(hi.m, (__m128i)pack_hi); | |
lo.hu |= hi.hu; | |
struct xy r; | |
memcpy(&r, &lo, sizeof r); | |
return r; | |
} | |
#endif | |
int main(void) | |
{ | |
struct xy xy; | |
uint32_t z; | |
while (scanf("%hx%hx", &xy.x, &xy.y) == 2) { | |
z = xy2z(&xy); | |
printf("%x\n", z); | |
xy = z2xy(z); | |
printf("%x %x\n", xy.x, xy.y); | |
#ifdef __SSE4_2__ | |
z = xy2z_sse(&xy); | |
printf("%x\n", z); | |
xy = z2xy_sse(z); | |
printf("%x %x\n", xy.x, xy.y); | |
#endif | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment