Skip to content

Instantly share code, notes, and snippets.

@folkertdev
Created May 21, 2025 14:29
Show Gist options
  • Save folkertdev/0561efe5f779f5b5cd57d14cc57fbe18 to your computer and use it in GitHub Desktop.
Save folkertdev/0561efe5f779f5b5cd57d14cc57fbe18 to your computer and use it in GitHub Desktop.
benchmark `compare256` performance
[package]
name = "compare256-benchmark"
version = "0.1.0"
edition = "2024"
[dev-dependencies]
divan = "0.1.21"
[[bench]]
name = "compare256"
harness = false
use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8};
fn main() {
// Run registered benchmarks.
divan::main();
}
const X: [u8; 256] = {
let mut result = [0; 256];
let mut i = 0;
while i < result.len() {
result[i] = i as u8;
i += 1;
}
result
};
const A: &[u8; 256] = &X;
const B: &[u8; 256] = &{
let mut result = X;
result[128] += 1;
result
};
const ARGS: [(&[u8; 256], &[u8; 256]); 2] = [(A, A), (A, B)];
#[divan::bench(args = ARGS)]
pub fn compare256_old((src0, src1): (&[u8; 256], &[u8; 256])) -> usize {
src0.iter().zip(src1).take_while(|(x, y)| x == y).count()
}
#[divan::bench(args = ARGS)]
pub fn compare256_new(args: (&[u8; 256], &[u8; 256])) -> usize {
unsafe { compare256_new_helper(args) }
}
#[target_feature(enable = "sse2,bmi1")]
pub unsafe fn compare256_new_helper((src0, src1): (&[u8; 256], &[u8; 256])) -> usize {
let src0 = src0.chunks_exact(16);
let src1 = src1.chunks_exact(16);
let mut len = 0;
unsafe {
for (chunk0, chunk1) in src0.zip(src1) {
// load the next chunks into a simd register
let xmm_src0 = _mm_loadu_si128(chunk0.as_ptr() as *const __m128i);
let xmm_src1 = _mm_loadu_si128(chunk1.as_ptr() as *const __m128i);
// element-wise compare of the 8-bit elements
let xmm_cmp = _mm_cmpeq_epi8(xmm_src0, xmm_src1);
// turn a 16 * 8-bit vector into a 16-bit integer.
// a bit in the output is set if the corresponding element is non-zero.
let mask = _mm_movemask_epi8(xmm_cmp) as u16;
if mask != 0xFFFF
/* i.e. all 1 bits */
{
let match_byte = mask.trailing_ones();
return len + match_byte as usize;
}
len += 16;
}
}
256
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment