Last active
September 15, 2024 11:25
-
-
Save imaami/a4bf6ad52fa51360432250f45b53299c to your computer and use it in GitHub Desktop.
Expand subsequences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <errno.h> | |
#include <inttypes.h> | |
#include <limits.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#ifdef __aarch64__ | |
#include <arm_neon.h> | |
#endif // __aarch64__ | |
#ifdef __x86_64__ | |
#include <immintrin.h> | |
#endif // __x86_64__ | |
static uint16_t | |
parse_uint16 (char *arg, | |
int *err); | |
static uint64_t | |
b24_expand (uint16_t seq); | |
int | |
main (int argc, | |
char **argv) | |
{ | |
for (int i = 1; i < argc; ++i) { | |
int e = 0; | |
uint16_t seq = parse_uint16(argc > 1 ? argv[i] : NULL, &e); | |
if (e) { | |
fprintf(stderr, "%s\n", strerror(e)); | |
continue; | |
} | |
printf("0x%016" PRIx64 "\n", b24_expand(seq)); | |
} | |
return EXIT_SUCCESS; | |
} | |
#ifdef __aarch64__ | |
static uint64_t | |
b24_expand (uint16_t seq) | |
{ | |
uint16_t rot = seq << 12U | seq >> 4U; | |
uint64_t ret = 0; | |
uint64x2_t x = vreinterpretq_u64_u8( | |
vqtbl1q_u8( | |
vreinterpretq_u8_u16( | |
vld1q_lane_u16( | |
&rot, | |
vld1q_lane_u16( | |
&seq, | |
vdupq_n_u16(0), | |
0 | |
), | |
1 | |
) | |
), | |
vld1q_u8(((uint8_t[16]){ | |
0x00, 0x00, 0x00, 0x00, | |
0x02, 0x02, 0x02, 0x02, | |
0x01, 0x01, 0x01, 0x01, | |
0x03, 0x03, 0x03, 0x03}) | |
) | |
) | |
); | |
x = vorrq_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x003c000f003c000f)) | |
), | |
vshrq_n_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x78001e0078001e00)) | |
), | |
5 | |
) | |
); | |
x = vorrq_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x000000ff000000ff)) | |
), | |
vshrq_n_u64( | |
vandq_u64( | |
x, | |
vdupq_n_u64(UINT64_C(0x03fc000003fc0000)) | |
), | |
10 | |
) | |
); | |
vst1q_lane_u64( | |
&ret, | |
vreinterpretq_u64_u8( | |
vqtbl1q_u8( | |
vreinterpretq_u8_u64(x), | |
vld1q_u8(((uint8_t[16]){ | |
0x00, 0x01, 0x04, 0x05, | |
0x08, 0x09, 0x0c, 0x0d, | |
0x80, 0x80, 0x80, 0x80, | |
0x80, 0x80, 0x80, 0x80}) | |
) | |
) | |
), | |
0 | |
); | |
return ret; | |
} | |
#endif // __aarch64__ | |
#ifdef __x86_64__ | |
static uint64_t | |
b24_expand (uint16_t seq) | |
{ | |
#define seq_prep(x) 0, (uint32_t)((x) << 12U | \ | |
(x) >> 4U) << 16U | (x) | |
__m128i k = _mm_shuffle_epi8( | |
_mm_set_epi64x(seq_prep(seq)), | |
_mm_set_epi64x(UINT64_C(0x0303030301010101), \ | |
UINT64_C(0x0202020200000000))); | |
#undef seq_prep | |
#define pext_mask UINT64_C(0x783c1e0f783c1e0f) | |
return _pext_u64((uint64_t)_mm_extract_epi64(k, 1), pext_mask) << 32U | |
| _pext_u64((uint64_t)_mm_extract_epi64(k, 0), pext_mask); | |
#undef pext_mask | |
} | |
#endif // __x86_64__ | |
static uint16_t | |
parse_uint16 (char *arg, | |
int *err) | |
{ | |
int e = !arg ? EFAULT : !*arg ? ENODATA : 0; | |
uint64_t u64 = 0; | |
if (!e) { | |
char *p = arg; | |
errno = 0; | |
u64 = _Generic( | |
u64, unsigned long: strtoul, | |
unsigned long long: strtoull)(arg, &p, 0); | |
e = errno; | |
if (!e && *p) | |
e = EINVAL; | |
if (!e && u64 > UINT16_MAX) | |
e = ERANGE; | |
} | |
*err = e; | |
return (uint16_t)(u64 & UINT64_C(0xffff)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment