Last active
October 17, 2023 23:17
-
-
Save recmo/389339a2e71825eadad1e56106b2e24b to your computer and use it in GitHub Desktop.
Rust aarch64 FEAT_SHA3 implementation of keccak_f1600.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![cfg(all(target_arch = "aarch64", target_feature = "sha3"))] | |
use core::arch::asm; | |
const RC: [u64; 24] = [ | |
0x0000000000000001, | |
0x0000000000008082, | |
0x800000000000808a, | |
0x8000000080008000, | |
0x000000000000808b, | |
0x0000000080000001, | |
0x8000000080008081, | |
0x8000000000008009, | |
0x000000000000008a, | |
0x0000000000000088, | |
0x0000000080008009, | |
0x000000008000000a, | |
0x000000008000808b, | |
0x800000000000008b, | |
0x8000000000008089, | |
0x8000000000008003, | |
0x8000000000008002, | |
0x8000000000000080, | |
0x000000000000800a, | |
0x800000008000000a, | |
0x8000000080008081, | |
0x8000000000008080, | |
0x0000000080000001, | |
0x8000000080008008, | |
]; | |
/// Keccak-f1600 on ARMv8-A with FEAT_SHA3. | |
/// | |
/// See p. K12.2.2 p. 11,749 of the ARM Reference manual. | |
/// Adapted from the Keccak-f1600 implementation in the XKCP/K12. | |
/// see <https://github.com/XKCP/K12/blob/df6a21e6d1f34c1aa36e8d702540899c97dba5a0/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S#L69> | |
pub fn keccak_f1600(state: &mut [u64; 25]) { | |
unsafe { | |
asm!(" | |
// Read state | |
ld1 {{ v0.1d- v3.1d}}, [x0], #32 | |
ld1 {{ v4.1d- v7.1d}}, [x0], #32 | |
ld1 {{ v8.1d-v11.1d}}, [x0], #32 | |
ld1 {{v12.1d-v15.1d}}, [x0], #32 | |
ld1 {{v16.1d-v19.1d}}, [x0], #32 | |
ld1 {{v20.1d-v23.1d}}, [x0], #32 | |
ld1 {{v24.1d}}, [x0] | |
sub x0, x0, #192 | |
// Loop 24 rounds | |
// NOTE: This loop actually computes two f1600 functions in | |
// parallel, in both the lower and the upper 64-bit of the | |
// 128-bit registers v0-v24. | |
mov x8, #24 | |
0: sub x8, x8, #1 | |
// Theta Calculations | |
eor3.16b v25, v20, v15, v10 | |
eor3.16b v26, v21, v16, v11 | |
eor3.16b v27, v22, v17, v12 | |
eor3.16b v28, v23, v18, v13 | |
eor3.16b v29, v24, v19, v14 | |
eor3.16b v25, v25, v5, v0 | |
eor3.16b v26, v26, v6, v1 | |
eor3.16b v27, v27, v7, v2 | |
eor3.16b v28, v28, v8, v3 | |
eor3.16b v29, v29, v9, v4 | |
rax1.2d v30, v25, v27 | |
rax1.2d v31, v26, v28 | |
rax1.2d v27, v27, v29 | |
rax1.2d v28, v28, v25 | |
rax1.2d v29, v29, v26 | |
// Rho and Phi | |
eor.16b v0, v0, v29 | |
xar.2d v25, v1, v30, #64 - 1 | |
xar.2d v1, v6, v30, #64 - 44 | |
xar.2d v6, v9, v28, #64 - 20 | |
xar.2d v9, v22, v31, #64 - 61 | |
xar.2d v22, v14, v28, #64 - 39 | |
xar.2d v14, v20, v29, #64 - 18 | |
xar.2d v26, v2, v31, #64 - 62 | |
xar.2d v2, v12, v31, #64 - 43 | |
xar.2d v12, v13, v27, #64 - 25 | |
xar.2d v13, v19, v28, #64 - 8 | |
xar.2d v19, v23, v27, #64 - 56 | |
xar.2d v23, v15, v29, #64 - 41 | |
xar.2d v15, v4, v28, #64 - 27 | |
xar.2d v28, v24, v28, #64 - 14 | |
xar.2d v24, v21, v30, #64 - 2 | |
xar.2d v8, v8, v27, #64 - 55 | |
xar.2d v4, v16, v30, #64 - 45 | |
xar.2d v16, v5, v29, #64 - 36 | |
xar.2d v5, v3, v27, #64 - 28 | |
xar.2d v27, v18, v27, #64 - 21 | |
xar.2d v3, v17, v31, #64 - 15 | |
xar.2d v30, v11, v30, #64 - 10 | |
xar.2d v31, v7, v31, #64 - 6 | |
xar.2d v29, v10, v29, #64 - 3 | |
// Chi and Iota | |
bcax.16b v20, v26, v22, v8 | |
bcax.16b v21, v8, v23, v22 | |
bcax.16b v22, v22, v24, v23 | |
bcax.16b v23, v23, v26, v24 | |
bcax.16b v24, v24, v8, v26 | |
ld1r.2d {{v26}}, [x1], #8 | |
bcax.16b v17, v30, v19, v3 | |
bcax.16b v18, v3, v15, v19 | |
bcax.16b v19, v19, v16, v15 | |
bcax.16b v15, v15, v30, v16 | |
bcax.16b v16, v16, v3, v30 | |
bcax.16b v10, v25, v12, v31 | |
bcax.16b v11, v31, v13, v12 | |
bcax.16b v12, v12, v14, v13 | |
bcax.16b v13, v13, v25, v14 | |
bcax.16b v14, v14, v31, v25 | |
bcax.16b v7, v29, v9, v4 | |
bcax.16b v8, v4, v5, v9 | |
bcax.16b v9, v9, v6, v5 | |
bcax.16b v5, v5, v29, v6 | |
bcax.16b v6, v6, v4, v29 | |
bcax.16b v3, v27, v0, v28 | |
bcax.16b v4, v28, v1, v0 | |
bcax.16b v0, v0, v2, v1 | |
bcax.16b v1, v1, v27, v2 | |
bcax.16b v2, v2, v28, v27 | |
eor.16b v0,v0,v26 | |
// Rounds loop | |
cbnz w8, 0b | |
// Write state | |
st1 {{ v0.1d- v3.1d}}, [x0], #32 | |
st1 {{ v4.1d- v7.1d}}, [x0], #32 | |
st1 {{ v8.1d-v11.1d}}, [x0], #32 | |
st1 {{v12.1d-v15.1d}}, [x0], #32 | |
st1 {{v16.1d-v19.1d}}, [x0], #32 | |
st1 {{v20.1d-v23.1d}}, [x0], #32 | |
st1 {{v24.1d}}, [x0] | |
", | |
in("x0") state.as_mut_ptr(), | |
in("x1") &RC, | |
clobber_abi("C"), | |
options(nostack) | |
); | |
} | |
} | |
pub fn keccak256(mut bytes: &[u8]) -> [u8; 32] { | |
const RATE: usize = 1088 / 8; | |
assert_eq!(RATE % 8, 0); | |
let mut state = [0u64; 25]; | |
// Intermediate whole blocks | |
while bytes.len() >= RATE { | |
for (b, s) in bytes[..RATE].chunks_exact(8).zip(state.iter_mut()) { | |
*s ^= u64::from_le_bytes(b.try_into().unwrap()); | |
} | |
bytes = &bytes[RATE..]; | |
keccak_f1600(&mut state); | |
} | |
debug_assert!(bytes.len() < RATE); | |
// Final block with padding | |
let (words, bytes) = bytes.split_at(bytes.len() & !7); | |
for (b, s) in words.chunks_exact(8).zip(state.iter_mut()) { | |
*s ^= u64::from_le_bytes(b.try_into().unwrap()); | |
} | |
let mut partial_word = [0u8; 8]; | |
partial_word[..bytes.len()].copy_from_slice(&bytes); | |
partial_word[bytes.len()] = 0x01; // Or 0x06 for SHA3-256 | |
state[words.len() / 8] ^= u64::from_le_bytes(partial_word); | |
state[(RATE / 8) - 1] ^= 0x8000000000000000; | |
keccak_f1600(&mut state); | |
// Output | |
let mut output = [0_u8; 32]; | |
for (o, s) in output.chunks_exact_mut(8).zip(state.iter()) { | |
o.copy_from_slice(&s.to_le_bytes()); | |
} | |
output | |
} | |
#[test] | |
fn test_keccak_f1600() { | |
// Test vectors are copied from XKCP (eXtended Keccak Code Package) | |
// https://github.com/XKCP/XKCP/blob/master/tests/TestVectors/KeccakF-1600-IntermediateValues.txt | |
let state_first = [ | |
0xF1258F7940E1DDE7, | |
0x84D5CCF933C0478A, | |
0xD598261EA65AA9EE, | |
0xBD1547306F80494D, | |
0x8B284E056253D057, | |
0xFF97A42D7F8E6FD4, | |
0x90FEE5A0A44647C4, | |
0x8C5BDA0CD6192E76, | |
0xAD30A6F71B19059C, | |
0x30935AB7D08FFC64, | |
0xEB5AA93F2317D635, | |
0xA9A6E6260D712103, | |
0x81A57C16DBCF555F, | |
0x43B831CD0347C826, | |
0x01F22F1A11A5569F, | |
0x05E5635A21D9AE61, | |
0x64BEFEF28CC970F2, | |
0x613670957BC46611, | |
0xB87C5A554FD00ECB, | |
0x8C3EE88A1CCF32C8, | |
0x940C7922AE3A2614, | |
0x1841F924A2C509E4, | |
0x16F53526E70465C2, | |
0x75F644E97F30A13B, | |
0xEAF1FF7B5CECA249, | |
]; | |
let state_second = [ | |
0x2D5C954DF96ECB3C, | |
0x6A332CD07057B56D, | |
0x093D8D1270D76B6C, | |
0x8A20D9B25569D094, | |
0x4F9C4F99E5E7F156, | |
0xF957B9A2DA65FB38, | |
0x85773DAE1275AF0D, | |
0xFAF4F247C3D810F7, | |
0x1F1B9EE6F79A8759, | |
0xE4FECC0FEE98B425, | |
0x68CE61B6B9CE68A1, | |
0xDEEA66C4BA8F974F, | |
0x33C43D836EAFB1F5, | |
0xE00654042719DBD9, | |
0x7CF8A9F009831265, | |
0xFD5449A6BF174743, | |
0x97DDAD33D8994B40, | |
0x48EAD5FC5D0BE774, | |
0xE3B8C8EE55B7B03C, | |
0x91A0226E649E42E9, | |
0x900E3129E7BADD7B, | |
0x202A9EC5FAA3CCE8, | |
0x5B3402464E1C3DB6, | |
0x609F4E62A44C1059, | |
0x20D06CD26A8FBF5C, | |
]; | |
let mut state = [0u64; 25]; | |
keccak_f1600(&mut state); | |
assert_eq!(state, state_first); | |
keccak_f1600(&mut state); | |
assert_eq!(state, state_second); | |
} | |
#[test] | |
fn test_keccak256() { | |
let input = b"testing"; | |
// 5f16f4c7f149ac4f9510d9cf8cf384038ad348b3bcdc01915f95de12df9d1b02 | |
let expected = [ | |
95, 22, 244, 199, 241, 73, 172, 79, 149, 16, 217, 207, 140, 243, 132, 3, 138, 211, 72, 179, | |
188, 220, 1, 145, 95, 149, 222, 18, 223, 157, 27, 2, | |
]; | |
assert_eq!(keccak256(input), expected); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment