Created
March 29, 2025 11:18
-
-
Save klauspost/7908a012728cb4f7037d3983546a897a to your computer and use it in GitHub Desktop.
MinLZ Machine Converted (Grok) Rust block decode/encode.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::error::Error; | |
use std::fmt; | |
#[derive(Debug, PartialEq, Eq)] | |
pub enum MinLzError { | |
BlockTooLarge, | |
CorruptInput, | |
} | |
impl fmt::Display for MinLzError { | |
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
match self { | |
MinLzError::BlockTooLarge => write!(f, "minlz: block too large"), | |
MinLzError::CorruptInput => write!(f, "minlz: corrupt input"), | |
} | |
} | |
} | |
impl Error for MinLzError {} | |
const MAX_BLOCK_SIZE: usize = 8 * 1024 * 1024; | |
const MIN_NON_LITERAL_BLOCK_SIZE: usize = 16; | |
const TABLE_BITS: u8 = 15; | |
const TABLE_SIZE: usize = 1 << TABLE_BITS; | |
const SKIP_LOG: u32 = 6; | |
const INPUT_MARGIN: usize = 8; | |
const MAX_COPY1_OFFSET: u32 = 1024; | |
const MIN_COPY2_OFFSET: u32 = 64; | |
const MAX_COPY2_OFFSET: u32 = MIN_COPY2_OFFSET + 65535; | |
const COPY2_LIT_MAX_LEN: u32 = 7 + 4; | |
const MAX_COPY3_LITS: u32 = (1 << 2) - 1; | |
const TAG_LITERAL: u8 = 0x00; | |
const TAG_REPEAT: u8 = 0x04; // 0x00 | (1 << 2) | |
const TAG_COPY1: u8 = 0x01; | |
const TAG_COPY2: u8 = 0x02; | |
const TAG_COPY3: u8 = 0x07; // 0x03 | 4 | |
const TAG_COPY2_FUSED: u8 = 0x03; | |
const PRIME_6BYTES: u64 = 227718039650203; | |
fn load8(src: &[u8], i: usize) -> u8 { | |
src[i] | |
} | |
fn load16(src: &[u8], i: usize) -> u16 { | |
u16::from_le_bytes(src[i..i + 2].try_into().unwrap()) | |
} | |
fn load32(src: &[u8], i: usize) -> u32 { | |
u32::from_le_bytes(src[i..i + 4].try_into().unwrap()) | |
} | |
fn load64(src: &[u8], i: usize) -> u64 { | |
u64::from_le_bytes(src[i..i + 8].try_into().unwrap()) | |
} | |
fn uvarint_encode(n: u64) -> Vec<u8> { | |
let mut buf = Vec::with_capacity(10); | |
let mut x = n; | |
while x >= 0x80 { | |
buf.push((x as u8) | 0x80); | |
x >>= 7; | |
} | |
buf.push(x as u8); | |
buf | |
} | |
fn uvarint_decode(src: &[u8]) -> Option<(u64, usize)> { | |
let mut x: u64 = 0; | |
let mut s: u32 = 0; | |
for i in 0..10 { | |
if i >= src.len() { | |
return None; | |
} | |
let b = src[i]; | |
if b < 0x80 { | |
if i == 9 && b > 1 { | |
return None; // overflow | |
} | |
x |= (b as u64) << s; | |
return Some((x, i + 1)); | |
} | |
x |= ((b & 0x7f) as u64) << s; | |
s += 7; | |
} | |
None | |
} | |
fn emit_literal(dst: &mut Vec<u8>, lit: &[u8]) { | |
let n = lit.len(); | |
if n == 0 { | |
return; | |
} | |
if n < 29 { | |
dst.push((n as u8) << 3 | TAG_LITERAL); | |
} else if n < 256 + 29 { | |
dst.push(29 << 3 | TAG_LITERAL); | |
dst.push((n - 29) as u8); | |
} else if n < 65536 + 29 { | |
dst.push(30 << 3 | TAG_LITERAL); | |
let nn = n - 29; | |
dst.push(nn as u8); | |
dst.push((nn >> 8) as u8); | |
} else if n < 1 << 24 + 29 { | |
dst.push(31 << 3 | TAG_LITERAL); | |
let nn = n - 29; | |
dst.push(nn as u8); | |
dst.push((nn >> 8) as u8); | |
dst.push((nn >> 16) as u8); | |
} else { | |
panic!("literal block too long"); | |
} | |
dst.extend_from_slice(lit); | |
} | |
fn emit_repeat(dst: &mut Vec<u8>, length: usize) { | |
let length = length - 1; | |
if length < 30 { | |
dst.push((length as u8) << 3 | TAG_REPEAT); | |
} else { | |
let length = length - 30; | |
if length < 256 { | |
dst.push(29 << 3 | TAG_REPEAT); | |
dst.push(length as u8); | |
} else if length < 65536 { | |
dst.push(30 << 3 | TAG_REPEAT); | |
dst.push(length as u8); | |
dst.push((length >> 8) as u8); | |
} else { | |
dst.push(31 << 3 | TAG_REPEAT); | |
dst.push(length as u8); | |
dst.push((length >> 8) as u8); | |
dst.push((length >> 16) as u8); | |
} | |
} | |
} | |
fn emit_copy(dst: &mut Vec<u8>, offset: u32, length: usize) { | |
if offset > MAX_COPY2_OFFSET { | |
let mut encoded = (offset - 65536) << 11 | TAG_COPY3 as u32; | |
let length = length - 4; | |
if length <= 60 { | |
encoded |= (length as u32) << 5; | |
dst.extend_from_slice(&encoded.to_le_bytes()); | |
} else { | |
let length = length - 60; | |
encoded |= 61 << 5; | |
dst.extend_from_slice(&encoded.to_le_bytes()); | |
if length < 256 { | |
dst.push(length as u8); | |
} else if length < 65536 { | |
dst.push(62); | |
dst.extend_from_slice(&(length as u16).to_le_bytes()); | |
} else { | |
dst.push(63); | |
dst.extend_from_slice(&((length as u32) << 8).to_le_bytes()[1..]); | |
} | |
} | |
} else if offset <= MAX_COPY1_OFFSET { | |
let offset = offset - 1; | |
let length = length - 4; | |
if length < 15 { | |
let x = (offset << 6) | (length as u32) << 2 | TAG_COPY1 as u32; | |
dst.extend_from_slice(&x.to_le_bytes()[..2]); | |
} else { | |
let x = (offset << 6) | (14 << 2) | TAG_COPY1 as u32; | |
dst.extend_from_slice(&x.to_le_bytes()[..2]); | |
let length = length - 14; | |
if length < 256 { | |
dst.push(length as u8); | |
} else { | |
emit_repeat(dst, length - 18); | |
} | |
} | |
} else { | |
let offset = offset - MIN_COPY2_OFFSET; | |
let length = length - 4; | |
if length <= 60 { | |
dst.push((length as u8) << 2 | TAG_COPY2); | |
dst.extend_from_slice(&(offset as u16).to_le_bytes()); | |
} else { | |
let length = length - 60; | |
dst.push(61 << 2 | TAG_COPY2); | |
dst.extend_from_slice(&(offset as u16).to_le_bytes()); | |
if length < 256 { | |
dst.push(length as u8); | |
} else if length < 65536 { | |
dst.push(62); | |
dst.extend_from_slice(&(length as u16).to_le_bytes()); | |
} else { | |
dst.push(63); | |
dst.extend_from_slice(&((length as u32) << 8).to_le_bytes()[1..]); | |
} | |
} | |
} | |
} | |
fn encode_min_lz_block_internal(dst: &mut Vec<u8>, src: &[u8], dst_limit: usize) -> usize { | |
let mut table = vec![0u32; TABLE_SIZE]; | |
let s_limit = src.len() - INPUT_MARGIN; | |
let mut next_emit = 0; | |
let mut s = 1; | |
let mut cv = load64(src, s); | |
let mut repeat = 1; | |
let mut d = dst.len(); | |
fn hash_value(u: u64) -> u32 { | |
(((u << 16) * PRIME_6BYTES) >> 49) as u32 | |
} | |
loop { | |
let mut candidate = 0; | |
loop { | |
let next_s = s + (s - next_emit) / (1 << SKIP_LOG) + 4; | |
if next_s > s_limit { | |
if next_emit < src.len() { | |
if dst.len() + (src.len() - next_emit) > dst_limit { | |
return 0; | |
} | |
emit_literal(dst, &src[next_emit..]); | |
} | |
return dst.len() - d; | |
} | |
let min_src_pos = if s > 2 << 20 + 65535 { | |
s - (2 << 20 + 65535) | |
} else { | |
0 | |
}; | |
let hash0 = hash_value(cv); | |
let hash1 = hash_value(cv >> 8); | |
candidate = table[hash0] as usize; | |
let candidate2 = table[hash1] as usize; | |
table[hash0] = s as u32; | |
table[hash1] = (s + 1) as u32; | |
let hash2 = hash_value(cv >> 16); | |
if (cv >> 8) as u32 == load32(src, s - repeat + 1) { | |
let mut base = s + 1; | |
while base > next_emit && base - repeat > 0 && src[base - repeat - 1] == src[base - 1] { | |
base -= 1; | |
} | |
if dst.len() + (base - next_emit) > dst_limit { | |
return 0; | |
} | |
emit_literal(dst, &src[next_emit..base]); | |
let mut candidate = s - repeat + 5; | |
s += 5; | |
while s <= s_limit { | |
let diff = load64(src, s) ^ load64(src, candidate); | |
if diff != 0 { | |
s += diff.trailing_zeros() as usize / 8; | |
break; | |
} | |
s += 8; | |
candidate += 8; | |
} | |
emit_repeat(dst, s - base); | |
next_emit = s; | |
if s >= s_limit { | |
if next_emit < src.len() { | |
if dst.len() + (src.len() - next_emit) > dst_limit { | |
return 0; | |
} | |
emit_literal(dst, &src[next_emit..]); | |
} | |
return dst.len() - d; | |
} | |
cv = load64(src, s); | |
continue; | |
} | |
if candidate >= min_src_pos && cv as u32 == load32(src, candidate) { | |
break; | |
} | |
candidate = table[hash2] as usize; | |
if candidate2 >= min_src_pos && (cv >> 8) as u32 == load32(src, candidate2) { | |
table[hash2] = (s + 2) as u32; | |
candidate = candidate2; | |
s += 1; | |
break; | |
} | |
table[hash2] = (s + 2) as u32; | |
if candidate >= min_src_pos && (cv >> 16) as u32 == load32(src, candidate) { | |
s += 2; | |
break; | |
} | |
cv = load64(src, next_s); | |
s = next_s; | |
} | |
while candidate > 0 && s > next_emit && src[candidate - 1] == src[s - 1] { | |
candidate -= 1; | |
s -= 1; | |
} | |
let base = s; | |
repeat = base - candidate; | |
s += 4; | |
candidate += 4; | |
while s <= src.len() - 8 { | |
let diff = load64(src, s) ^ load64(src, candidate); | |
if diff != 0 { | |
s += diff.trailing_zeros() as usize / 8; | |
break; | |
} | |
s += 8; | |
candidate += 8; | |
} | |
let length = s - base; | |
if next_emit != base { | |
if base - next_emit > MAX_COPY3_LITS as usize || repeat < MIN_COPY2_OFFSET { | |
if dst.len() + (s - next_emit) > dst_limit { | |
return 0; | |
} | |
emit_literal(dst, &src[next_emit..base]); | |
emit_copy(dst, repeat, length); | |
} else if repeat <= MAX_COPY2_OFFSET { | |
// emitCopyLits2 simplified | |
let lits = &src[next_emit..base]; | |
let offset = repeat - MIN_COPY2_OFFSET; | |
let length = length - 4; | |
if length > (COPY2_LIT_MAX_LEN - 4) as usize { | |
dst.push(TAG_COPY2_FUSED | ((COPY2_LIT_MAX_LEN - 4) as u8) << 5 | (lits.len() - 1) as u8 << 3); | |
dst.extend_from_slice(&(offset as u16).to_le_bytes()); | |
dst.extend_from_slice(lits); | |
emit_repeat(dst, length - (COPY2_LIT_MAX_LEN - 4) as usize); | |
} else { | |
dst.push(TAG_COPY2_FUSED | (length as u8) << 5 | (lits.len() - 1) as u8 << 3); | |
dst.extend_from_slice(&(offset as u16).to_le_bytes()); | |
dst.extend_from_slice(lits); | |
} | |
} else { | |
// emitCopyLits3 simplified | |
let lits = &src[next_emit..base]; | |
let mut encoded = ((repeat - 65536) << 11) | TAG_COPY3 as u32 | (lits.len() as u32) << 3; | |
let length = length - 4; | |
if length <= 60 { | |
encoded |= (length as u32) << 5; | |
dst.extend_from_slice(&encoded.to_le_bytes()); | |
} else { | |
let length = length - 60; | |
encoded |= 61 << 5; | |
dst.extend_from_slice(&encoded.to_le_bytes()); | |
dst.push(length as u8); | |
} | |
dst.extend_from_slice(lits); | |
} | |
} else { | |
emit_copy(dst, repeat, length); | |
} | |
next_emit = s; | |
if s >= s_limit { | |
if next_emit < src.len() { | |
if dst.len() + (src.len() - next_emit) > dst_limit { | |
return 0; | |
} | |
emit_literal(dst, &src[next_emit..]); | |
} | |
return dst.len() - d; | |
} | |
// Simplified immediate match check | |
if s + 2 < src.len() && (load32(src, s) >> 8) as u32 == load32(src, s - repeat) { | |
s += 2; | |
candidate = s - repeat; | |
while s <= src.len() - 8 { | |
let diff = load64(src, s) ^ load64(src, candidate); | |
if diff != 0 { | |
s += diff.trailing_zeros() as usize / 8; | |
break; | |
} | |
s += 8; | |
candidate += 8; | |
} | |
emit_copy(dst, repeat, s - (s - repeat)); | |
next_emit = s; | |
if s >= s_limit { | |
if next_emit < src.len() { | |
if dst.len() + (src.len() - next_emit) > dst_limit { | |
return 0; | |
} | |
emit_literal(dst, &src[next_emit..]); | |
} | |
return dst.len() - d; | |
} | |
} | |
cv = load64(src, s); | |
} | |
} | |
pub fn encode_min_lz_block(src: &[u8]) -> Result<Vec<u8>, MinLzError> { | |
if src.is_empty() { | |
return Ok(vec![0]); | |
} | |
if src.len() > MAX_BLOCK_SIZE { | |
return Err(MinLzError::BlockTooLarge); | |
} | |
if src.len() < MIN_NON_LITERAL_BLOCK_SIZE { | |
let mut dst = vec![0, 0]; | |
dst.extend_from_slice(src); | |
return Ok(dst); | |
} | |
let mut dst = Vec::with_capacity(src.len() + 10); | |
dst.push(0); | |
dst.extend_from_slice(&uvarint_encode(src.len() as u64)); | |
let dst_limit = src.len() - (src.len() >> 5) - 6; | |
let n = encode_min_lz_block_internal(&mut dst, src, dst_limit); | |
if n > 0 { | |
Ok(dst) | |
} else { | |
let mut dst = vec![0, 0]; | |
dst.extend_from_slice(src); | |
Ok(dst) | |
} | |
} | |
fn min_lz_decode_simple(dst: &mut [u8], src: &[u8]) -> usize { | |
let mut d = 0; | |
let mut s = 0; | |
while s < src.len() - 11 { | |
match src[s] & 0x03 { | |
TAG_LITERAL => { | |
let v = src[s]; | |
let x = v >> 3; | |
let (length, advance) = match x { | |
x if x < 29 => (x as usize + 1, 1), | |
29 => (30 + src[s + 1] as usize, 2), | |
30 => (30 + load16(src, s + 1) as usize, 3), | |
_ => (30 + (load32(src, s) >> 8) as usize, 4), | |
}; | |
s += advance; | |
if v & 4 != 0 { | |
// Repeat, handled below | |
} else { | |
if length > dst.len() - d || length > src.len() - s { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&src[s..s + length]); | |
d += length; | |
s += length; | |
continue; | |
} | |
} | |
TAG_COPY1 => { | |
let length = (src[s] >> 2) & 15; | |
let offset = (load16(src, s) >> 6) as usize + 1; | |
if length == 15 { | |
s += 3; | |
if s > src.len() { | |
return 1; | |
} | |
let length = src[s - 1] as usize + 18; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
} else { | |
s += 2; | |
let length = length as usize + 4; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&dst[d - offset..d - offset + length]); | |
d += length; | |
} | |
continue; | |
} | |
TAG_COPY2 => { | |
let length = (src[s] >> 2) as usize; | |
let offset = load16(src, s + 1) as usize + MIN_COPY2_OFFSET as usize; | |
s += if length <= 60 { | |
let length = length + 4; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&dst[d - offset..d - offset + length]); | |
d += length; | |
3 | |
} else { | |
match length { | |
61 => { | |
s += 4; | |
let length = src[s - 1] as usize + 64; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
0 | |
} | |
62 => { | |
s += 5; | |
let length = load16(src, s - 2) as usize + 64; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
0 | |
} | |
63 => { | |
s += 6; | |
let length = (load32(src, s - 4) >> 8) as usize + 64; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
0 | |
} | |
_ => return 1, | |
} | |
}; | |
continue; | |
} | |
0x03 => { | |
let val = load32(src, s); | |
let is_copy3 = val & 4 != 0; | |
let lit_len = ((val >> 3) & 3) as usize; | |
if !is_copy3 { | |
let length = 4 + ((val >> 5) & 7) as usize; | |
let offset = ((val >> 8) & 65535) as usize + MIN_COPY2_OFFSET as usize; | |
s += 3; | |
if lit_len > 0 { | |
let lit_len = lit_len + 1; | |
if lit_len > dst.len() - d || s + lit_len > src.len() { | |
return 1; | |
} | |
dst[d..d + lit_len].copy_from_slice(&src[s..s + lit_len]); | |
d += lit_len; | |
s += lit_len; | |
} | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&dst[d - offset..d - offset + length]); | |
d += length; | |
} else { | |
let length_tmp = (val >> 5) & 63; | |
let offset = (val >> 11) as usize + 65536; | |
let (length, advance) = if length_tmp < 61 { | |
(length_tmp as usize + 4, 4) | |
} else { | |
match length_tmp { | |
61 => (src[s + 4] as usize + 64, 5), | |
62 => (load16(src, s + 4) as usize + 64, 6), | |
63 => ((load32(src, s + 3) >> 8) as usize + 64, 7), | |
_ => return 1, | |
} | |
}; | |
s += advance; | |
if lit_len > 0 { | |
if lit_len > dst.len() - d || s + lit_len > src.len() { | |
return 1; | |
} | |
dst[d..d + lit_len].copy_from_slice(&src[s..s + lit_len]); | |
d += lit_len; | |
s += lit_len; | |
} | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
} | |
continue; | |
} | |
_ => return 1, | |
} | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
} | |
while s < src.len() { | |
match src[s] & 0x03 { | |
TAG_LITERAL => { | |
let v = src[s]; | |
let x = v >> 3; | |
let (length, advance) = match x { | |
x if x < 29 => (x as usize + 1, 1), | |
29 => (src[s + 1] as usize + 30, 2), | |
30 => (load16(src, s + 1) as usize + 30, 3), | |
_ => ((load32(src, s) >> 8) as usize + 30, 4), | |
}; | |
s += advance; | |
if s > src.len() { | |
return 1; | |
} | |
if v & 4 != 0 { | |
// Repeat, handled below | |
} else { | |
if length > dst.len() - d || length > src.len() - s { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&src[s..s + length]); | |
d += length; | |
s += length; | |
continue; | |
} | |
} | |
TAG_COPY1 => { | |
s += 2; | |
if s > src.len() { | |
return 1; | |
} | |
let length = (src[s - 2] >> 2) & 15; | |
let offset = (load16(src, s - 2) >> 6) as usize + 1; | |
let length = if length == 15 { | |
s += 1; | |
if s > src.len() { | |
return 1; | |
} | |
src[s - 1] as usize + 18 | |
} else { | |
length as usize + 4 | |
}; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&dst[d - offset..d - offset + length]); | |
d += length; | |
continue; | |
} | |
TAG_COPY2 => { | |
s += 3; | |
if s > src.len() { | |
return 1; | |
} | |
let length = (src[s - 3] >> 2) as usize; | |
let offset = load16(src, s - 2) as usize + MIN_COPY2_OFFSET as usize; | |
let length = if length <= 60 { | |
length + 4 | |
} else { | |
match length { | |
61 => { | |
s += 1; | |
if s > src.len() { | |
return 1; | |
} | |
src[s - 1] as usize + 64 | |
} | |
62 => { | |
s += 2; | |
if s > src.len() { | |
return 1; | |
} | |
load16(src, s - 2) as usize + 64 | |
} | |
63 => { | |
s += 3; | |
if s > src.len() { | |
return 1; | |
} | |
(load32(src, s - 4) >> 8) as usize + 64 | |
} | |
_ => return 1, | |
} | |
}; | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
continue; | |
} | |
0x03 => { | |
s += 4; | |
if s > src.len() { | |
return 1; | |
} | |
let val = load32(src, s - 4); | |
let is_copy3 = val & 4 != 0; | |
let lit_len = ((val >> 3) & 3) as usize; | |
if !is_copy3 { | |
let length = 4 + ((val >> 5) & 7) as usize; | |
let offset = ((val >> 8) & 65535) as usize + MIN_COPY2_OFFSET as usize; | |
s -= 1; | |
if lit_len > 0 { | |
let lit_len = lit_len + 1; | |
if lit_len > dst.len() - d || s + lit_len > src.len() { | |
return 1; | |
} | |
dst[d..d + lit_len].copy_from_slice(&src[s..s + lit_len]); | |
d += lit_len; | |
s += lit_len; | |
} | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
dst[d..d + length].copy_from_slice(&dst[d - offset..d - offset + length]); | |
d += length; | |
} else { | |
let length_tmp = (val >> 5) & 63; | |
let offset = (val >> 11) as usize + 65536; | |
let (length, advance) = if length_tmp < 61 { | |
(length_tmp as usize + 4, 0) | |
} else { | |
match length_tmp { | |
61 => { | |
s += 1; | |
if s > src.len() { | |
return 1; | |
} | |
(src[s - 1] as usize + 64, 1) | |
} | |
62 => { | |
s += 2; | |
if s > src.len() { | |
return 1; | |
} | |
(load16(src, s - 2) as usize + 64, 2) | |
} | |
63 => { | |
s += 3; | |
if s > src.len() { | |
return 1; | |
} | |
((load32(src, s - 4) >> 8) as usize + 64, 3) | |
} | |
_ => return 1, | |
} | |
}; | |
s += advance; | |
if lit_len > 0 { | |
if lit_len > dst.len() - d || s + lit_len > src.len() { | |
return 1; | |
} | |
dst[d..d + lit_len].copy_from_slice(&src[s..s + lit_len]); | |
d += lit_len; | |
s += lit_len; | |
} | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
} | |
continue; | |
} | |
_ => return 1, | |
} | |
if d < offset || length > dst.len() - d { | |
return 1; | |
} | |
let src_start = d - offset; | |
for i in 0..length { | |
dst[d + i] = dst[src_start + i]; | |
} | |
d += length; | |
} | |
if d != dst.len() { | |
return 1; | |
} | |
0 | |
} | |
pub fn decode_min_lz_block(src: &[u8]) -> Result<Vec<u8>, MinLzError> { | |
if src.is_empty() { | |
return Err(MinLzError::CorruptInput); | |
} | |
if src[0] == 0 && src.len() == 1 { | |
return Ok(vec![]); | |
} | |
if src[0] != 0 { | |
return Err(MinLzError::CorruptInput); | |
} | |
let (d_len, n) = uvarint_decode(&src[1..]).ok_or(MinLzError::CorruptInput)?; | |
if d_len > MAX_BLOCK_SIZE as u64 { | |
return Err(MinLzError::CorruptInput); | |
} | |
if d_len == 0 { | |
let data = &src[1 + n..]; | |
return Ok(data.to_vec()); | |
} | |
let block = &src[1 + n..]; | |
let mut dst = vec![0; d_len as usize]; | |
if min_lz_decode_simple(&mut dst, block) == 0 { | |
Ok(dst) | |
} else { | |
Err(MinLzError::CorruptInput) | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_empty() { | |
let data = b""; | |
let compressed = encode_min_lz_block(data).unwrap(); | |
assert_eq!(compressed, vec![0]); | |
let decompressed = decode_min_lz_block(&compressed).unwrap(); | |
assert_eq!(decompressed, data); | |
} | |
#[test] | |
fn test_small() { | |
let data = b"hello"; | |
let compressed = encode_min_lz_block(data).unwrap(); | |
assert_eq!(compressed, vec![0, 0, b'h', b'e', b'l', b'l', b'o']); | |
let decompressed = decode_min_lz_block(&compressed).unwrap(); | |
assert_eq!(decompressed, data); | |
} | |
#[test] | |
fn test_repetitive() { | |
let data = vec![b'a'; 100]; | |
let compressed = encode_min_lz_block(&data).unwrap(); | |
let decompressed = decode_min_lz_block(&compressed).unwrap(); | |
assert_eq!(decompressed, data); | |
} | |
#[test] | |
fn test_too_large() { | |
let data = vec![0; MAX_BLOCK_SIZE + 1]; | |
let result = encode_min_lz_block(&data); | |
assert!(matches!(result, Err(MinLzError::BlockTooLarge))); | |
} | |
#[test] | |
fn test_corrupt() { | |
let data = vec![1, 2, 3]; | |
let result = decode_min_lz_block(&data); | |
assert!(matches!(result, Err(MinLzError::CorruptInput))); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment