Created
December 12, 2024 15:46
-
-
Save jevinskie/d604dcaa38a09f4d14d09c167149dbc8 to your computer and use it in GitHub Desktop.
AArch64 Advanced SIMD SHA1 digest to ASCII hex string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| digest_to_hex: | |
| ldr q0, [x0] | |
| ushr.16b v1, v0, #4 | |
| movi.16b v2, #15 | |
| and.16b v0, v0, v2 | |
| ; nibble_hex_lut = { '0', ... '9', 'a' ... 'f' } | |
| adrp x8, nibble_hex_lut@PAGE | |
| ldr q2, [x8, nibble_hex_lut@PAGEOFF] | |
| tbl.16b v1, { v2 }, v1 | |
| tbl.16b v0, { v2 }, v0 | |
| zip1.16b v2, v1, v0 | |
| zip2.16b v3, v1, v0 | |
| mov x8, x1 | |
| st1.16b { v2, v3 }, [x8], #32 | |
| ldr s0, [x0, #16] | |
| ushr.8b v1, v0, #4 | |
| movi.8b v2, #15 | |
| and.8b v0, v0, v2 | |
| zip1.8b v0, v1, v0 | |
| movi.8b v1, #48 ; '0' | |
| orr.8b v0, v0, v1 | |
| movi.8b v1, #57 ; '9' | |
| cmhi.8b v1, v0, v1 | |
| movi.8b v2, #39 | |
| and.8b v1, v1, v2 | |
| add.8b v0, v1, v0 | |
| str d0, [x8] | |
| strb wzr, [x1, #40] ; NUL | |
| ret | |
| ; Iterations: 100 | |
| ; Instructions: 2600 | |
| ; Total Cycles: 620 | |
| ; Total uOps: 2800 | |
| ; | |
| ; Dispatch Width: 6 | |
| ; uOps Per Cycle: 4.52 | |
| ; IPC: 4.19 | |
| ; Block RThroughput: 6.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| uint64_t u32_to_hex_ascii_u64(const uint32_t value) { | |
| // Load and reverse bytes to get big-endian ordering in one instruction. | |
| // Original: [B0, B1, B2, B3] | |
| // After vdup_n_u32: [B0,B1,B2,B3, 0,0,0,0] | |
| const uint8x8_t bytes = vreinterpret_u8_u32(vdup_n_u32(value)); | |
| // Extract high nibble and low nibble directly: | |
| // high nibble = byte >> 4 | |
| // low nibble = byte & 0x0F | |
| // const uint8x8_t nibble_swapped() | |
| const uint8x8_t high = vshr_n_u8(bytes, 4); | |
| const uint8x8_t low = vand_u8(bytes, vdup_n_u8(0x0F)); | |
| // Interleave high and low nibbles: [H0,L0,H1,L1,H2,L2,H3,L3,...] | |
| // vzip_u8 takes corresponding elements from high and low, interleaving them. | |
| const uint8x8x2_t zipped = vzip_u8(high, low); | |
| // zipped.val[0] = H0,L0,H1,L1,H2,L2,H3,L3 (first 8 nibbles of interest) | |
| // No need to combine further; zipped.val[0] already holds the 8 nibbles we want. | |
| const uint8x8_t nibbles = zipped.val[0]; | |
| // ASCII conversion: | |
| // Add '0' to bring [0..9] into '0'..'9' and [10..15] into ':'..'?' | |
| const uint8x8_t nibbles_ascii_stage_0 = vadd_u8(nibbles, vdup_n_u8('0')); | |
| // Values above '9' ('9' = 0x39) should become 'a'..'f' | |
| // Check which are greater than '9': | |
| const uint8x8_t mask = vcgt_u8(nibbles_ascii_stage_0, vdup_n_u8('9')); | |
| // Add 0x27 ('9' + 1 + 0x27 = 'a') to these values | |
| const uint8x8_t ascii_nibbles = vadd_u8(nibbles_ascii_stage_0, vand_u8(mask, vdup_n_u8(0x27))); | |
| uint64_t res; | |
| vst1_u8(reinterpret_cast<uint8_t *>(&res), ascii_nibbles); | |
| return res; | |
| } | |
| void digest_to_hex(const uint8_t *__restrict digest, | |
| char *__restrict hex_str) { | |
| const uint8x16_t mask_lo = vdupq_n_u8(0x0F); // Mask for low 4 bits | |
| // Load the first 16 bytes of the digest | |
| const uint8x16_t input = vld1q_u8(digest); | |
| const uint8x16_t hi = vshrq_n_u8(input, 4); // Shift high nibbles down | |
| const uint8x16_t lo = vandq_u8(input, mask_lo); // Isolate low nibbles | |
| // Convert to ASCII hex characters | |
| constexpr uint8x16_t lut = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; | |
| const uint8x16_t hex_hi = vqtbl1q_u8(lut, hi); | |
| const uint8x16_t hex_lo = vqtbl1q_u8(lut, lo); | |
| // Store the results interleaved | |
| const uint8x16x2_t hex_chars_interleaved = vzipq_u8(hex_hi, hex_lo); | |
| vst1q_u8_x2((to_from_cast<uint8_t *, char *>(hex_str)), hex_chars_interleaved); | |
| // Handle the remaining 4 bytes | |
| const uint32_t remaining_bytes = *reinterpret_cast<const uint32_t *>(digest + 16); | |
| const uint64_t hex_packed = u32_to_hex_ascii_u64(remaining_bytes); | |
| *reinterpret_cast<uint64_t *>(hex_str + 32) = hex_packed; | |
| hex_str[SHA1_OUTPUT_SIZE * 2] = '\0'; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment