Created
September 30, 2021 06:19
-
-
Save JarkkoPFC/0e4e599320b0cc7ea92df45fb416d79a to your computer and use it in GitHub Desktop.
Faster 16/32bit 2D Morton Code encode/decode functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uint16_t encode16_morton2(uint8_t x_, uint8_t y_) | |
{ | |
uint32_t res=x_|(uint32_t(y_)<<16); | |
res=(res|(res<<4))&0x0f0f0f0f; | |
res=(res|(res<<2))&0x33333333; | |
res=(res|(res<<1))&0x55555555; | |
return uint16_t(res|(res>>15)); | |
} | |
//---- | |
uint32_t encode32_morton2(uint16_t x_, uint16_t y_) | |
{ | |
uint64_t res=x_|(uint64_t(y_)<<32); | |
res=(res|(res<<8))&0x00ff00ff00ff00ff; | |
res=(res|(res<<4))&0x0f0f0f0f0f0f0f0f; | |
res=(res|(res<<2))&0x3333333333333333; | |
res=(res|(res<<1))&0x5555555555555555; | |
return uint32_t(res|(res>>31)); | |
} | |
//---- | |
void decode16_morton2(uint8_t &x_, uint8_t &y_, uint16_t mc_) | |
{ | |
uint32_t res=(mc_|(uint32_t(mc_)<<15))&0x55555555; | |
res=(res|(res>>1))&0x33333333; | |
res=(res|(res>>2))&0x0f0f0f0f; | |
res=res|(res>>4); | |
x_=uint8_t(res); | |
y_=uint8_t(res>>16); | |
} | |
//---- | |
void decode32_morton2(uint16_t &x_, uint16_t &y_, uint32_t mc_) | |
{ | |
uint64_t res=(mc_|(uint64_t(mc_)<<31))&0x5555555555555555; | |
res=(res|(res>>1))&0x3333333333333333; | |
res=(res|(res>>2))&0x0f0f0f0f0f0f0f0f; | |
res=(res|(res>>4))&0x00ff00ff00ff00ff; | |
res=res|(res>>8); | |
x_=uint16_t(res); | |
y_=uint16_t(res>>32); | |
} |
[pvrtc] These formulas work for sequential scans:
twiddle1 = ((twiddle1 | 0xAAAAAAAAu) + 1u) & 0x55555555u;
twiddle2 = ((twiddle2 | 0x55555555u) + 2u) & 0xAAAAAAAAu;
twiddle = twiddle1 + twiddle2;
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Based on some micro benchmarking, these functions are ~50% faster than classic bit twiddling implementations using separate2/compact2 for each coordinate. It's also better to use 16-bit version for performance if xy-coordinates are in range 0-255, which gains further ~30% boost.