Skip to content

Instantly share code, notes, and snippets.

@Arnavion
Last active March 29, 2025 08:43
Show Gist options
  • Save Arnavion/3c0128afec19a9699f0cbb904d0fc88c to your computer and use it in GitHub Desktop.
Save Arnavion/3c0128afec19a9699f0cbb904d0fc88c to your computer and use it in GitHub Desktop.
RISC-V misc
//! <https://rust.godbolt.org/z/z78Gh8j47>
/// # Safety
///
/// `ptr` must point to an allocation that contains at least `size_of::<u64>()` bytes
/// at `ptr`'s address.
#[inline(never)]
pub unsafe fn core_read_unaligned(ptr: *const u8) -> u64 {
// With `target-feature+unaligned-scalar-mem` this does a single `ld` as desired.
// Without that feature it falls back to loading each byte one-by-one and shifting them into place,
// so the custom impl below ends up being better.
//
// The standard feature `target-feature=+zicclsm` (hardware supports misaligned loads and stores)
// does not make a difference either way.
unsafe { ptr.cast::<u64>().read_unaligned() }
}
/// # Safety
///
/// `ptr` must point to an allocation that contains at least `size_of::<u64>()` bytes
/// at `ptr`'s address.
#[inline(never)]
pub unsafe fn custom_read_unaligned(ptr: *const u8) -> u64 {
let (first_ptr, second_ptr, shamt) = unsafe { custom_read_unaligned_inner(ptr) };
unsafe {
// Both pointers are guaranteed to point to valid u64-sized memory within
// our address space (assuming the caller satisfied our safety critera).
// We can load them, then shift the two u64s to keep just the parts we care about,
// then OR them to get the result.
//
// However dereferencing the pointers in Rust code might read partially outside
// the allocation that `ptr` was from, which is UB. The official alternative is
// to use inline assembly to dereference the pointers instead.
// This alternative is also used by compiler_builtins' x86_64 SWAR impl of `strlen`,
// for the same reason.
//
// This code ends up compiling to a functionally equivalent version of
// <https://old.reddit.com/r/RISCV/comments/1ezbyr4/performance_of_misaligned_loads/ljkbx95/>
// As mentioned there, it is possible to do only one load by checking
// if the pointer is already aligned, but doing two loads is also cheap and
// avoids branching.
let first: u64;
core::arch::asm!(
"ld {value}, ({ptr})",
ptr = in(reg) first_ptr,
value = lateout(reg) first,
);
let second: u64;
core::arch::asm!(
"ld {value}, ({ptr})",
ptr = in(reg) second_ptr,
value = lateout(reg) second,
);
if cfg!(target_endian = "little") {
// Eg shamt == 2 * 8
// __abcdef:gh______
// -> (fedcba__, ______hg)
// -> hgfedcba
(second << ((64 - shamt) % 64)) | (first >> shamt)
}
else {
// Eg shamt == 2 * 8
// __abcdef:gh______
// -> (__abcdef, gh______)
// -> abcdefgh
(first << shamt) | (second >> ((64 - shamt) % 64))
}
}
}
#[inline(always)]
unsafe fn custom_read_unaligned_inner(ptr: *const u8) -> (*const u64, *const u64, usize) {
unsafe {
let shamt = ptr.addr() % core::mem::size_of::<u64>();
let first_ptr = ptr.byte_sub(shamt);
let second_ptr = ptr.map_addr(|addr| addr.next_multiple_of(core::mem::size_of::<u64>()));
(first_ptr.cast::<u64>(), second_ptr.cast::<u64>(), shamt * 8)
}
}
#[cfg(test)]
mod tests {
use super::custom_read_unaligned_inner;
#[test]
fn test_custom_read_unaligned_inner() {
let ptr: *const u8 = 8_usize as _;
let result = unsafe { custom_read_unaligned_inner(ptr) };
assert_eq!(result.0.addr(), 8);
assert_eq!(result.1.addr(), 8);
assert_eq!(result.2, 0 * 8);
let ptr: *const u8 = 11_usize as _;
let result = unsafe { custom_read_unaligned_inner(ptr) };
assert_eq!(result.0.addr(), 8);
assert_eq!(result.1.addr(), 16);
assert_eq!(result.2, 3 * 8);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment