Created
June 14, 2024 14:48
-
-
Save folkertdev/d7aec8980934c07988fcb80ed2709639 to your computer and use it in GitHub Desktop.
PEXT experiment for lzma/xz variable width integer decoding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub fn decode2(buf: &[u8], size_max: usize, num: &mut u64) -> usize { | |
let number = unsafe { core::ptr::read_unaligned(buf.as_ptr().cast()) }; | |
let bits = unsafe { core::arch::x86_64::_pext_u64(number, 0x7F7F_7F7F_7F7F_7F7Fu64) }; | |
let bytes_used = (number & !0x7F7F_7F7F_7F7F_7F7Fu64).count_ones(); | |
*num = bits | (buf[8] as u64) << 56; | |
(size_max > 0) as usize + bytes_used as usize | |
} | |
pub fn encode(buf: &mut [u8; 9], num: u64) -> usize { | |
if num > u64::MAX / 2 { | |
return 0; | |
} | |
let mut num = num; | |
let mut i = 0; | |
while num >= 0x80 { | |
buf[i] = (num as u8) | 0x80; | |
num >>= 7; | |
i += 1; | |
} | |
buf[i] = num as u8; | |
i + 1 | |
} | |
pub fn decode(buf: &[u8], size_max: usize, num: &mut u64) -> usize { | |
if size_max == 0 { | |
return 0; | |
} | |
let size_max = if size_max > 9 { 9 } else { size_max }; | |
*num = (buf[0] & 0x7F) as u64; | |
let mut i = 0; | |
while (buf[i] & 0x80) != 0 { | |
i += 1; | |
if i >= size_max || buf[i] == 0x00 { | |
return 0; | |
} | |
*num |= ((buf[i] & 0x7F) as u64) << (i * 7); | |
} | |
i + 1 | |
} | |
fn main() { | |
let mut data = (0..(1 << 24)) | |
.map(|num| { | |
let mut buf = [0u8; 9]; | |
let encoded_size = encode(&mut buf, num); | |
(buf, encoded_size) | |
}) | |
.collect::<Vec<_>>(); | |
use std::time::Instant; | |
let before = Instant::now(); | |
for (buf, encoded_size) in data.iter() { | |
let mut decoded_num = 0u64; | |
let decoded_size = decode(buf, *encoded_size, &mut decoded_num); | |
} | |
dbg!(Instant::now().duration_since(before)); | |
let before = Instant::now(); | |
for (buf, encoded_size) in data.iter() { | |
let mut decoded_num = 0u64; | |
let decoded_size = decode2(buf, *encoded_size, &mut decoded_num); | |
} | |
dbg!(Instant::now().duration_since(before)); | |
} | |
#[test] | |
fn test() { | |
::quickcheck::quickcheck(helper as fn(_) -> bool); | |
fn helper(num: u64) -> bool { | |
let mut buf = [0u8; 9]; | |
let encoded_size = encode(&mut buf, num); | |
println!("Encoded size: {}", encoded_size); | |
println!("Encoded bytes: {:?}", &buf[..encoded_size]); | |
let mut decoded_num1 = 0u64; | |
let decoded_size1 = decode(&buf, encoded_size, &mut decoded_num1); | |
let mut decoded_num2 = 0u64; | |
let decoded_size2 = decode2(&buf, encoded_size, &mut decoded_num2); | |
// assert_eq!(decoded_num1, decoded_num2, "invalid!"); | |
assert_eq!(decoded_size1, decoded_size2); | |
decoded_size1 == decoded_size2 | |
} | |
} | |
#[test] | |
fn smallest_without_final_byte() { | |
let mut buf = [0u8; 9]; | |
let encoded_size = encode(&mut buf, 72057594037927936u64); | |
dbg!(buf); | |
// let buf = [255, 255, 255, 255, 255, 255, 255, 127, 0]; | |
let mut decoded_num2 = 0u64; | |
let decoded_size2 = decode2(&buf, 8, &mut decoded_num2); | |
println!("{:b}", 72057594037927936u64); | |
assert_eq!(72057594037927936, decoded_num2); | |
} | |
#[test] | |
fn zero() { | |
let mut buf = [0u8; 9]; | |
let encoded_size = encode(&mut buf, 0); | |
let mut decoded_num1 = 0u64; | |
let decoded_size1 = decode(&buf, 8, &mut decoded_num1); | |
let mut decoded_num2 = 0u64; | |
let decoded_size2 = decode2(&buf, 8, &mut decoded_num2); | |
assert_eq!(0, decoded_num2); | |
assert_eq!(decoded_size1, decoded_size2); | |
} | |
#[test] | |
fn large() { | |
let mut buf = [0u8; 9]; | |
let encoded_size = encode(&mut buf, 9223372036854775808u64); | |
dbg!(buf, encoded_size); | |
let mut decoded_num1 = 0u64; | |
let decoded_size1 = decode(&buf, encoded_size, &mut decoded_num1); | |
let mut decoded_num2 = 0u64; | |
let decoded_size2 = decode2(&buf, encoded_size, &mut decoded_num2); | |
assert_eq!(decoded_num1, decoded_num2); | |
assert_eq!(decoded_size1, decoded_size2); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment