Last active
March 29, 2025 08:43
-
-
Save Arnavion/3c0128afec19a9699f0cbb904d0fc88c to your computer and use it in GitHub Desktop.
RISC-V misc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! <https://rust.godbolt.org/z/z78Gh8j47> | |
/// # Safety | |
/// | |
/// `ptr` must point to an allocation that contains at least `size_of::<u64>()` bytes | |
/// at `ptr`'s address. | |
#[inline(never)] | |
pub unsafe fn core_read_unaligned(ptr: *const u8) -> u64 { | |
// With `target-feature+unaligned-scalar-mem` this does a single `ld` as desired. | |
// Without that feature it falls back to loading each byte one-by-one and shifting them into place, | |
// so the custom impl below ends up being better. | |
// | |
// The standard feature `target-feature=+zicclsm` (hardware supports misaligned loads and stores) | |
// does not make a difference either way. | |
unsafe { ptr.cast::<u64>().read_unaligned() } | |
} | |
/// # Safety | |
/// | |
/// `ptr` must point to an allocation that contains at least `size_of::<u64>()` bytes | |
/// at `ptr`'s address. | |
#[inline(never)] | |
pub unsafe fn custom_read_unaligned(ptr: *const u8) -> u64 { | |
let (first_ptr, second_ptr, shamt) = unsafe { custom_read_unaligned_inner(ptr) }; | |
unsafe { | |
// Both pointers are guaranteed to point to valid u64-sized memory within | |
// our address space (assuming the caller satisfied our safety critera). | |
// We can load them, then shift the two u64s to keep just the parts we care about, | |
// then OR them to get the result. | |
// | |
// However dereferencing the pointers in Rust code might read partially outside | |
// the allocation that `ptr` was from, which is UB. The official alternative is | |
// to use inline assembly to dereference the pointers instead. | |
// This alternative is also used by compiler_builtins' x86_64 SWAR impl of `strlen`, | |
// for the same reason. | |
// | |
// This code ends up compiling to a functionally equivalent version of | |
// <https://old.reddit.com/r/RISCV/comments/1ezbyr4/performance_of_misaligned_loads/ljkbx95/> | |
// As mentioned there, it is possible to do only one load by checking | |
// if the pointer is already aligned, but doing two loads is also cheap and | |
// avoids branching. | |
let first: u64; | |
core::arch::asm!( | |
"ld {value}, ({ptr})", | |
ptr = in(reg) first_ptr, | |
value = lateout(reg) first, | |
); | |
let second: u64; | |
core::arch::asm!( | |
"ld {value}, ({ptr})", | |
ptr = in(reg) second_ptr, | |
value = lateout(reg) second, | |
); | |
if cfg!(target_endian = "little") { | |
// Eg shamt == 2 * 8 | |
// __abcdef:gh______ | |
// -> (fedcba__, ______hg) | |
// -> hgfedcba | |
(second << ((64 - shamt) % 64)) | (first >> shamt) | |
} | |
else { | |
// Eg shamt == 2 * 8 | |
// __abcdef:gh______ | |
// -> (__abcdef, gh______) | |
// -> abcdefgh | |
(first << shamt) | (second >> ((64 - shamt) % 64)) | |
} | |
} | |
} | |
#[inline(always)] | |
unsafe fn custom_read_unaligned_inner(ptr: *const u8) -> (*const u64, *const u64, usize) { | |
unsafe { | |
let shamt = ptr.addr() % core::mem::size_of::<u64>(); | |
let first_ptr = ptr.byte_sub(shamt); | |
let second_ptr = ptr.map_addr(|addr| addr.next_multiple_of(core::mem::size_of::<u64>())); | |
(first_ptr.cast::<u64>(), second_ptr.cast::<u64>(), shamt * 8) | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::custom_read_unaligned_inner; | |
#[test] | |
fn test_custom_read_unaligned_inner() { | |
let ptr: *const u8 = 8_usize as _; | |
let result = unsafe { custom_read_unaligned_inner(ptr) }; | |
assert_eq!(result.0.addr(), 8); | |
assert_eq!(result.1.addr(), 8); | |
assert_eq!(result.2, 0 * 8); | |
let ptr: *const u8 = 11_usize as _; | |
let result = unsafe { custom_read_unaligned_inner(ptr) }; | |
assert_eq!(result.0.addr(), 8); | |
assert_eq!(result.1.addr(), 16); | |
assert_eq!(result.2, 3 * 8); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment