Skip to content

Instantly share code, notes, and snippets.

@lu-zero
lu-zero / bench_memcpy_x86.cpp
Last active May 22, 2026 11:32
memcpy streaming-store benchmark: x86 AVX2 vs AArch64 STNP, with/without software prefetch
// Standalone memcpy-to-WC benchmark — x86 and AArch64.
// Compile: clang++ -O3 -march=native bench_memcpy_x86_neon.cpp -o bench && ./bench
//
// Tests streaming-store variants vs libc memcpy.
// On x86 the comparison is: AVX2 stream with/without prefetch.
// On AArch64 the comparison is: STNP with/without software prefetch.
// Also: NEON intrinsics and inline assembly variants.
// Run multiple times; the best-of-7 is reported.
#include <cstdint>
enum Error<T> {
..
Again(T),
}
fn send<T: AsRef<[u8]>>(buf: T, ...) -> Result<(), Error<T>> { ... }
let mut buf = ...;
# install the system toolchain (see https://docs.brew.sh/Installation)
xcode-select --install
# install homebrew
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
# get the rust base toolchain
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# make sure to have all the PATHs set, this depends on your shell
5dc771f6c999eda25ec182720d27c78716f08376 refs/heads/0.1
bda611057e18b2a4302745c5679b22c7e8438ae0 refs/heads/0.2
727571e9e88068cbdcf6158006be0671da7a6071 refs/heads/0.3
bfaeeeb01f6341756193904830b45bc94ff8ada5 refs/heads/0.4
d40304b3a13b7f5165e31c5ab35e36a513f37e8a refs/heads/bugfix/build-grep
31fc0a4da6f35dc3bdcc0ea3b1b0cdb7f121edbd refs/heads/bugfix/cdef-disabled-cost
2c97c0fcb0ebc17ed3a19456bfcfb9c0dd8984c8 refs/heads/bugfix/non-fatal-distclean
b42905636adadce4cfcacfe6bca7ed3a9ac94c56 refs/heads/bugfix/reference
aef01444ad5e5091817105adefcf99d1e54c3058 refs/heads/cdef-rebase-f5890e3
91af948fdbd26b2f86bd87ac6ae3ec0bf1c86bb6 refs/heads/cleanup/no-wx
./src/libdav1d.5.dylib.p/mc16_avx2.obj: file format Mach-O 64-bit x86-64
Disassembly of section __TEXT,__text:
0000000000000000 _dav1d_put_8tap_regular_16bpc_avx2:
0: 41 ba 2d 00 00 00 movl $45, %r10d
6: 41 bb 2d 00 00 00 movl $45, %r11d
c: e9 8f 00 00 00 jmp 143 <_dav1d_put_8tap_16bpc_avx2>
diff --git a/src/unix/linux_like/linux/musl/b64/aarch64/align.rs b/src/unix/linux_like/linux/musl/b64/aarch64/align.rs
index e114eaecd..637ea4e5a 100644
--- a/src/unix/linux_like/linux/musl/b64/aarch64/align.rs
+++ b/src/unix/linux_like/linux/musl/b64/aarch64/align.rs
@@ -1,25 +1,27 @@
s_no_extra_traits! {
#[allow(missing_debug_implementations)]
#[repr(align(16))]
pub struct max_align_t {
priv_: [f32; 8]
/* automatically generated by rust-bindgen */
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct FooA {
pub a: ::std::os::raw::c_int,
}
#[test]
fn bindgen_test_layout_FooA() {
assert_eq!(
@lu-zero
lu-zero / main.rs
Created October 4, 2020 10:43
testcase
use rayon::prelude::*;
use std::{thread, time};
fn process(data: usize) -> usize {
(0..100usize)
.into_par_iter()
.map(|v| {
thread::sleep(time::Duration::from_millis(1));
data + v
})
https://github.com/sapir/gcc-rust/issues
https://github.com/philberty/gccrs
warning: function cannot return without recursing
--> src/main.rs:8:5
|
8 | fn default() -> Self {
| ^^^^^^^^^^^^^^^^^^^^ cannot return without recursing
...
11 | .. Default::default()
| ------------------ recursive call site
|
= note: `#[warn(unconditional_recursion)]` on by default