Skip to content

Instantly share code, notes, and snippets.

@folkertdev
Created June 14, 2024 14:48
Show Gist options
  • Save folkertdev/d7aec8980934c07988fcb80ed2709639 to your computer and use it in GitHub Desktop.
Save folkertdev/d7aec8980934c07988fcb80ed2709639 to your computer and use it in GitHub Desktop.
PEXT experiment for lzma/xz variable width integer decoding
pub fn decode2(buf: &[u8], size_max: usize, num: &mut u64) -> usize {
let number = unsafe { core::ptr::read_unaligned(buf.as_ptr().cast()) };
let bits = unsafe { core::arch::x86_64::_pext_u64(number, 0x7F7F_7F7F_7F7F_7F7Fu64) };
let bytes_used = (number & !0x7F7F_7F7F_7F7F_7F7Fu64).count_ones();
*num = bits | (buf[8] as u64) << 56;
(size_max > 0) as usize + bytes_used as usize
}
pub fn encode(buf: &mut [u8; 9], num: u64) -> usize {
if num > u64::MAX / 2 {
return 0;
}
let mut num = num;
let mut i = 0;
while num >= 0x80 {
buf[i] = (num as u8) | 0x80;
num >>= 7;
i += 1;
}
buf[i] = num as u8;
i + 1
}
pub fn decode(buf: &[u8], size_max: usize, num: &mut u64) -> usize {
if size_max == 0 {
return 0;
}
let size_max = if size_max > 9 { 9 } else { size_max };
*num = (buf[0] & 0x7F) as u64;
let mut i = 0;
while (buf[i] & 0x80) != 0 {
i += 1;
if i >= size_max || buf[i] == 0x00 {
return 0;
}
*num |= ((buf[i] & 0x7F) as u64) << (i * 7);
}
i + 1
}
fn main() {
let mut data = (0..(1 << 24))
.map(|num| {
let mut buf = [0u8; 9];
let encoded_size = encode(&mut buf, num);
(buf, encoded_size)
})
.collect::<Vec<_>>();
use std::time::Instant;
let before = Instant::now();
for (buf, encoded_size) in data.iter() {
let mut decoded_num = 0u64;
let decoded_size = decode(buf, *encoded_size, &mut decoded_num);
}
dbg!(Instant::now().duration_since(before));
let before = Instant::now();
for (buf, encoded_size) in data.iter() {
let mut decoded_num = 0u64;
let decoded_size = decode2(buf, *encoded_size, &mut decoded_num);
}
dbg!(Instant::now().duration_since(before));
}
#[test]
fn test() {
::quickcheck::quickcheck(helper as fn(_) -> bool);
fn helper(num: u64) -> bool {
let mut buf = [0u8; 9];
let encoded_size = encode(&mut buf, num);
println!("Encoded size: {}", encoded_size);
println!("Encoded bytes: {:?}", &buf[..encoded_size]);
let mut decoded_num1 = 0u64;
let decoded_size1 = decode(&buf, encoded_size, &mut decoded_num1);
let mut decoded_num2 = 0u64;
let decoded_size2 = decode2(&buf, encoded_size, &mut decoded_num2);
// assert_eq!(decoded_num1, decoded_num2, "invalid!");
assert_eq!(decoded_size1, decoded_size2);
decoded_size1 == decoded_size2
}
}
#[test]
fn smallest_without_final_byte() {
let mut buf = [0u8; 9];
let encoded_size = encode(&mut buf, 72057594037927936u64);
dbg!(buf);
// let buf = [255, 255, 255, 255, 255, 255, 255, 127, 0];
let mut decoded_num2 = 0u64;
let decoded_size2 = decode2(&buf, 8, &mut decoded_num2);
println!("{:b}", 72057594037927936u64);
assert_eq!(72057594037927936, decoded_num2);
}
#[test]
fn zero() {
let mut buf = [0u8; 9];
let encoded_size = encode(&mut buf, 0);
let mut decoded_num1 = 0u64;
let decoded_size1 = decode(&buf, 8, &mut decoded_num1);
let mut decoded_num2 = 0u64;
let decoded_size2 = decode2(&buf, 8, &mut decoded_num2);
assert_eq!(0, decoded_num2);
assert_eq!(decoded_size1, decoded_size2);
}
#[test]
fn large() {
let mut buf = [0u8; 9];
let encoded_size = encode(&mut buf, 9223372036854775808u64);
dbg!(buf, encoded_size);
let mut decoded_num1 = 0u64;
let decoded_size1 = decode(&buf, encoded_size, &mut decoded_num1);
let mut decoded_num2 = 0u64;
let decoded_size2 = decode2(&buf, encoded_size, &mut decoded_num2);
assert_eq!(decoded_num1, decoded_num2);
assert_eq!(decoded_size1, decoded_size2);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment