Last active
September 16, 2025 11:42
-
-
Save lifthrasiir/df47509caac2f065032ef72e70f2ec05 to your computer and use it in GitHub Desktop.
Disfilter: Fabian Giesen's x86-32 transformer, reworked for x86-64 in Rust (WIP)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![cfg_attr( | |
not(all(feature = "encode", feature = "decode")), | |
allow(dead_code, unused_imports) | |
)] | |
// TODO: | |
// - Better error checking | |
// - Better code structure | |
// - Detect function boundary (skip CC or multi-byte nops) | |
use alloc::{vec, vec::Vec}; | |
use core::array; | |
use core::cell::Cell; | |
use core::mem; | |
use core::ops; | |
use bytemuck::must_cast_slice; | |
#[cfg(all(feature = "encode", trace_encode))] | |
macro_rules! if_trace_encode { | |
($($t:tt)*) => {{$($t)*}}; | |
} | |
#[cfg(all(feature = "encode", not(trace_encode)))] | |
macro_rules! if_trace_encode { | |
($($t:tt)*) => {}; | |
} | |
#[cfg(all(feature = "decode", trace_decode))] | |
macro_rules! if_trace_decode { | |
($($t:tt)*) => {{$($t)*}}; | |
} | |
#[cfg(all(feature = "decode", not(trace_decode)))] | |
macro_rules! if_trace_decode { | |
($($t:tt)*) => {}; | |
} | |
fn to_u16(s: &[u8]) -> u16 { | |
u16::from_le_bytes(s.try_into().unwrap()) | |
} | |
fn to_u32(s: &[u8]) -> u32 { | |
u32::from_le_bytes(s.try_into().unwrap()) | |
} | |
fn to_u64(s: &[u8]) -> u64 { | |
u64::from_le_bytes(s.try_into().unwrap()) | |
} | |
const N_: u8 = 0b0000; // no immediate | |
const N1: u8 = 0b0001; // 8-bit immediate | |
const N2: u8 = 0b0010; // 16-bit immediate | |
const N4: u8 = 0b0011; // 32/16-bit immediate, depending on operand size prefix | |
const NZ: u8 = 0b0100; // 32/64-bit immediate, depending on REX.W | |
const J1: u8 = 0b0101; // 8-bit relative jump target | |
const J4: u8 = 0b0110; // 32-bit relative jump target | |
const JA: u8 = 0b0111; // 32-bit absolute jump target | |
const A_: u8 = 0b1000; // 16/32/64-bit absolute address that is not a jump target, | |
// exact bit size depends on the instruction and context | |
const R1: u8 = 0b1001; // modR/M + 8-bit immediate | |
const XX: u8 = 0b1010; // invalid, has to be escaped | |
const R4: u8 = 0b1011; // modR/M + 32/16-bit immediate, depending on operand size prefix | |
const R_: u8 = 0b1100; // modR/M + no immediate | |
const BP: u8 = 0b1101; // 1-byte prefix that will be marked | |
const M2: u8 = 0b1110; // opcode byte + modR/M + no immediate (map 2) | |
const M3: u8 = 0b1111; // opcode byte + modR/M + 8-bit immediate (map 3) | |
#[inline(always)] | |
const fn op3_followed(flags: u8) -> bool { | |
flags >= M2 | |
} | |
#[inline(always)] | |
const fn has_modrm(flags: u8) -> bool { | |
flags >= R1 | |
} | |
#[inline(always)] | |
const fn modrm_to_imm(flags: u8) -> u8 { | |
flags & 3 | |
} | |
const _: () = { | |
assert!(N_ + 1 == N1 && N1 + 1 == N2 && N2 + 1 == N4 && N4 + 1 == NZ); | |
assert!(modrm_to_imm(R_) == N_ && modrm_to_imm(R1) == N1 && modrm_to_imm(R4) == N4); | |
assert!(M2 + 1 == M3); | |
}; | |
// https://sandpile.org/ | |
// | |
// https://github.com/torvalds/linux/blob/master/arch/x86/lib/x86-opcode-map.txt | |
// - Placeholders are an argument made of one uppercase letter and one lowercase letter. | |
// - Immediate or address is present if some placeholder starts with AIJLO. | |
// - For -b/-w/-d/-z placeholders, its size is fixed to 8/16/32/32 bits. | |
// - For -v placeholders, its size is normally 32 bits but becomes 16 bits with 66 prefixed. | |
// - -p placeholder introduces an additional 16-bit segment selector before address. | |
// - L- placeholder introduces an additional 8-bit register selector. | |
// - ModR/M byte is present if some placeholder starts with CDEGMNPQRSTUVW. | |
// 1-byte opcodes (legacy map 0) | |
const OPCODES0: [u8; 256] = [ | |
R_, R_, R_, R_, N1, N4, N_, N_, R_, R_, R_, R_, N1, N4, N_, XX, // 0 | |
R_, R_, R_, R_, N1, N4, N_, N_, R_, R_, R_, R_, N1, N4, N_, N_, // 1 | |
R_, R_, R_, R_, N1, N4, BP, N_, R_, R_, R_, R_, N1, N4, BP, N_, // 2 | |
R_, R_, R_, R_, N1, N4, BP, N_, R_, R_, R_, R_, N1, N4, BP, N_, // 3 | |
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 4 | |
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 5 | |
N_, N_, R_, R_, BP, BP, BP, BP, N4, R4, N1, R1, N_, N_, N_, N_, // 6 | |
J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, // 7 | |
R1, R4, R1, R1, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 8 | |
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, JA, N_, N_, N_, N_, N_, // 9 | |
A_, A_, A_, A_, N_, N_, N_, N_, N1, N4, N_, N_, N_, N_, N_, N_, // A | |
N1, N1, N1, N1, N1, N1, N1, N1, NZ, NZ, NZ, NZ, NZ, NZ, NZ, NZ, // B | |
R1, R1, N2, N_, R_, R_, R1, R4, N2, N_, N2, N_, N_, N1, N_, N_, // C | |
R_, R_, R_, R_, N1, N1, N_, N_, R_, R_, R_, R_, R_, R_, R_, R_, // D | |
J1, J1, J1, J1, N1, N1, N1, N1, J4, J4, A_, J1, N_, N_, N_, N_, // E | |
BP, N_, BP, BP, N_, N_, R1, R4, N_, N_, N_, N_, N_, N_, R_, R_, // F | |
]; | |
// Some 1-byte opcodes are outright invalid in x86-64. | |
const OPCODES0_I64: [u32; 8] = [ | |
//FEDCBA9876543210 FEDCBA9876543210 | |
0b0100000011000000_1100000011000000, // 1x, 0x | |
0b1000000010000000_1000000010000000, // 3x, 2x | |
0b0000000000000000_1111111111111111, // 5x, 4x | |
0b0000000000000000_0000000000000111, // 7x, 6x | |
0b0000010000000000_0000000000000100, // 9x, 8x | |
0b0000000000000000_0000000000000000, // Bx, Ax | |
0b0000000001110000_0100000000110000, // Dx, Cx | |
0b0000000000000000_0000010000000000, // Fx, Ex | |
]; | |
const PRE_VEX3: u8 = 0xc4; | |
const PRE_VEX2: u8 = 0xc5; | |
const PRE_EVEX: u8 = 0x62; | |
const PRE_REX2: u8 = 0xd5; | |
const PRE_2BYTE: u8 = 0x0f; | |
const PRE_OSIZE: u8 = 0x66; | |
const PRE_REPNE: u8 = 0xf2; | |
const PRE_REP: u8 = 0xf3; | |
const OP_CALLN: u16 = 0x0_e8; // CALL Jz | |
// Opcodes in the map 0 that need an additional 16-bit immediate. | |
const OP_CALLF: u16 = 0x0_9a; // CALL Ap (32-bit only) | |
const OP_JMPF: u16 = 0x0_ea; // JMP Ap (32-bit only) | |
const OP_ENTER: u16 = 0x0_c8; // ENTER Iw,Ib | |
// Opcodes in the map 0 that have immediates only with /0 or /1. | |
const OP_GRP3_1: u16 = 0x0_f6; // TEST Eb,Ib; NOT/NEG/[I]MUL/[I]DIV Eb | |
const OP_GRP3_2: u16 = 0x0_f7; // TEST Ev,Iv; NOT/NEG/[I]MUL/[I]DIV Ev | |
// 2-byte opcodes, starting with 0F (legacy map 1) | |
const OPCODES1: [u8; 256] = [ | |
R_, R_, N_, N_, XX, N_, N_, N_, N_, N_, XX, XX, XX, R_, N_, R1, // 0F 0 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, XX, R_, R_, // 0F 1 | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 2 | |
N_, N_, N_, N_, N_, N_, XX, N_, M2, XX, M3, XX, XX, XX, XX, XX, // 0F 3 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 4 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 5 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 6 | |
R1, R1, R1, R1, R_, R_, R_, N_, N_, R_, R_, R_, R_, R_, R_, R_, // 0F 7 | |
J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, // 0F 8 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 9 | |
N_, N_, N_, R_, R1, R_, R_, R_, N_, N_, N_, R_, R1, R_, R_, R_, // 0F A | |
R_, R_, R_, R_, R_, R_, R_, R_, N_, R_, R1, R_, R_, R_, R_, R_, // 0F B | |
R_, R_, R1, R_, R1, R1, R1, R_, N_, N_, N_, N_, N_, N_, N_, N_, // 0F C | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F D | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F E | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, XX, // 0F F | |
]; | |
// Opcode in the map 1 that has a ModR/M byte only with some prefixes. | |
const OP_JMPE_POPCNT: u16 = 0x1_b8; // JMPE Jz (IA-64 only); POPCNT Gv,Ev (with F3 prefix) | |
// EVEX opcode map 4 (largely a subset of the opcode map 0) | |
const OPCODES4: [u8; 256] = [ | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 0 | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 1 | |
R_, R_, R_, R_, R1, XX, XX, XX, R_, R_, R_, R_, R1, XX, XX, XX, // 2 | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 3 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 4 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 5 | |
R_, R_, XX, XX, XX, R_, R_, XX, XX, R4, XX, R1, XX, XX, XX, XX, // 6 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 7 | |
R1, R4, XX, R1, N_, N_, XX, XX, R_, XX, XX, XX, XX, XX, XX, R_, // 8 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 9 | |
XX, XX, XX, XX, XX, R_, XX, XX, XX, XX, XX, XX, XX, R_, XX, R_, // A | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // B | |
R1, R1, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // C | |
R_, R_, R_, R_, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // D | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // E | |
R_, R_, R_, XX, R_, R_, R1, R4, R_, R_, XX, XX, XX, XX, R_, R_, // F | |
]; | |
const OP_MAP4_GRP3_1: u16 = 0x4_f6; | |
const OP_MAP4_GRP3_2: u16 = 0x4_f7; | |
// EVEX opcode map 7 | |
const OPCODES7: [u8; 256] = [ | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 1 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 2 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 3 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 4 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 5 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 6 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 7 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 8 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 9 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // A | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // B | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // C | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // D | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // E | |
XX, XX, XX, XX, XX, XX, XX, XX, R4, XX, XX, XX, XX, XX, XX, XX, // F | |
]; | |
// Opcode in the map 7 which immediate isn't affected by the operand size prefix. | |
const OP_URDMSR_UWRMSR: u16 = 0x7_f8; // URDMSR Rq,Id; UWRMSR Id,Rq | |
// Special bytes in the ST_OP stream. They should be available in any operating modes | |
// and decode without any more operand, but yet have to be extremely unlikely to occur. | |
// | |
// - Since we regularize any *VEX & REX prefixes, a single byte REX prefix is | |
// also rewritten as a uniform marker followed by a normalized REX data byte. | |
// REX bytes themselves (40-4F) are used as an opcode map prefix instead, | |
// so the original 2-byte prefix (0F) is repurposed for the REX marker instead. | |
// | |
// - Jump tables are indicated by INT1 (F1), which is meant to be used for hardware debugging | |
// and therefore extremely unlikely to occur in the regular x86 opcode stream. | |
// | |
// - Verbatim bytes are indicated by HLT (F4), which is privileged and can only occur rarely | |
// due to its semantics, making it a good opcode to steal. | |
// | |
// The original disfilter used to use INTO (CE) instead of HLT, but it is now invalid in | |
// the long mode and has a chance to be repurposed in the future. | |
const REX_MARKER: u8 = 0x0f; | |
const JUMPTAB: u8 = 0xf1; | |
const ESC: u8 = 0xf4; | |
#[inline(always)] | |
fn lookup_opcode(op: u8, map: u8, is64: bool) -> u8 { | |
const OPCODES: [u16; 256] = { | |
let mut tab = [0u16; 256]; | |
let mut i = 0; | |
while i < 256 { | |
assert!(OPCODES0[i] | OPCODES1[i] | OPCODES4[i] | OPCODES7[i] < 16); | |
tab[i] = OPCODES0[i] as u16 | |
| (OPCODES1[i] as u16) << 4 | |
| (OPCODES4[i] as u16) << 8 | |
| (OPCODES7[i] as u16) << 12; | |
i += 1; | |
} | |
// Override escape codes. | |
assert!(OPCODES0[ESC as usize] == N_); | |
assert!(OPCODES0[JUMPTAB as usize] == N_); | |
tab[ESC as usize] = tab[ESC as usize] & !0xf | XX as u16; | |
tab[JUMPTAB as usize] = tab[JUMPTAB as usize] & !0xf | XX as u16; | |
tab | |
}; | |
const MAP_FLAGS: [u8; 16] = [ | |
0x20, 0x24, R_, R1, 0x28, R_, R_, 0x2c, XX, XX, XX, XX, XX, XX, XX, XX, | |
]; | |
if map == 0 && is64 && OPCODES0_I64[op as usize >> 5] >> (op as u32 & 31) & 1 == 1 { | |
XX | |
} else { | |
let map_flags = MAP_FLAGS[map as usize]; | |
if map_flags < 0x20 { | |
map_flags | |
} else { | |
(OPCODES[op as usize] >> (map_flags & 0x1f)) as u8 & 15 | |
} | |
} | |
} | |
#[inline(always)] | |
const fn prefix_hash(b: u8) -> u32 { | |
let b = b as u32; | |
((b << 2) ^ (b >> 2)) & 31 | |
} | |
#[inline(always)] | |
const fn has_osize_prefix(prefixes: u32) -> bool { | |
(prefixes >> prefix_hash(PRE_OSIZE)) & 1 != 0 | |
} | |
#[inline(always)] | |
const fn has_rep_prefix(prefixes: u32) -> bool { | |
prefixes & ((1 << prefix_hash(PRE_REPNE)) | (1 << prefix_hash(PRE_REP))) != 0 | |
} | |
const _: () = { | |
let prefixes = [ | |
0x26, 0x2e, 0x36, 0x3e, 0x64, 0x65, 0x66, 0x67, 0xf0, 0xf2, 0xf3, | |
]; | |
let mut bitset = 0; | |
let mut i = 0; | |
while i < prefixes.len() { | |
let b = prefixes[i]; | |
bitset |= 1u32 << prefix_hash(b); | |
assert!(OPCODES0[b as usize] == BP); | |
i += 1; | |
} | |
assert!(bitset.count_ones() == 11); | |
}; | |
macro_rules! define_streams { | |
($($i:ident $s:literal),* $(,)?) => ( | |
define_streams! { @0 $($i)* } | |
#[allow(dead_code)] const STREAM_NAMES: [&str; NUM_STREAMS] = [$(stringify!($i)),*]; | |
const STREAM_SIZES: [usize; NUM_STREAMS] = [$($s / 8),*]; | |
); | |
(@$c:tt $i:ident $($t:tt)*) => ( | |
#[allow(dead_code)] const $i: usize = $c; | |
define_streams! { @(1 + $c) $($t)* } | |
); | |
(@$c:tt) => ( | |
const NUM_STREAMS: usize = $c; | |
); | |
} | |
define_streams! { | |
ST_OP 8, | |
ST_EVEX 8, ST_VEX 8, ST_REX 8, ST_SIB 8, | |
ST_CALL_IDX 8, | |
ST_DISP8_R0 8, ST_DISP8_R1 8, ST_DISP8_R2 8, ST_DISP8_R3 8, | |
ST_DISP8_R4 8, ST_DISP8_R5 8, ST_DISP8_R6 8, ST_DISP8_R7 8, | |
ST_DISP32 32, | |
ST_JUMP8 8, ST_JUMP32 32, ST_JUMP64 64, | |
ST_IMM8 8, ST_IMM16 16, ST_IMM32 32, ST_IMM64 64, | |
ST_ADDR16 16, ST_ADDR32 32, ST_ADDR64 64, | |
ST_CALL32 32, ST_CALL64 64, | |
ST_JUMPTAB64 64, | |
ST_PAD0 0, ST_PAD1 0, ST_PAD2 0, ST_PAD3 0, ST_PAD4 0, ST_PAD5 0, ST_PAD6 0, ST_PAD7 0, | |
ST_PAD8 0, ST_PAD9 0, ST_PAD10 0, ST_PAD11 0, ST_PAD12 0, ST_PAD13 0, ST_PAD14 0, ST_PAD15 0, | |
} | |
const ST_MODRM: usize = ST_OP; | |
const ST_AJUMP32: usize = ST_JUMP32; | |
const ST_JUMPTAB_COUNT: usize = ST_OP; | |
#[cfg(feature = "encode")] | |
#[derive(Debug)] | |
pub struct EncodedStreams { | |
origin: u64, | |
sentinel_size: u8, | |
streams: [Vec<u8>; NUM_STREAMS], | |
} | |
#[cfg(feature = "encode")] | |
impl EncodedStreams { | |
fn new(origin: u64) -> Self { | |
Self { | |
origin, | |
sentinel_size: 0, | |
streams: array::from_fn(|_| Vec::new()), | |
} | |
} | |
fn check(&self, st: usize, size: usize) { | |
let expected = STREAM_SIZES[st]; | |
if expected > 0 { | |
debug_assert_eq!(size, expected); | |
} | |
} | |
fn put8(&mut self, st: usize, v: u8) { | |
if_trace_encode! { | |
print!("({}:{v:02X})", &STREAM_NAMES[st][3..]); | |
} | |
self.check(st, 1); | |
self.streams[st].push(v); | |
} | |
fn put32(&mut self, st: usize, v: u32) { | |
if_trace_encode! { | |
print!("({}:{v:08X})", &STREAM_NAMES[st][3..]); | |
} | |
self.check(st, 4); | |
self.streams[st].extend_from_slice(&v.to_le_bytes()); | |
} | |
fn put64(&mut self, st: usize, v: u64) { | |
if_trace_encode! { | |
print!("({}:{v:016X})", &STREAM_NAMES[st][3..]); | |
} | |
self.check(st, 8); | |
self.streams[st].extend_from_slice(&v.to_le_bytes()); | |
} | |
fn copy(&mut self, st: usize, v: &[u8]) { | |
if_trace_encode! { | |
print!("({}:{})", &STREAM_NAMES[st][3..], { | |
use core::fmt; | |
struct Hex<'a>(&'a [u8]); | |
impl fmt::Display for Hex<'_> { | |
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
for b in self.0 { | |
write!(f, "{b:02X}")?; | |
} | |
Ok(()) | |
} | |
} | |
Hex(v) | |
}); | |
} | |
self.check(st, v.len()); | |
self.streams[st].extend_from_slice(v); | |
} | |
fn op(&mut self, map: u8, op: u8) { | |
if map > 0 || op & 0xf0 == 0x40 { | |
self.op_map(map); | |
} | |
self.put8(ST_OP, op); | |
} | |
fn op_esc(&mut self, b: u8) { | |
if_trace_encode! { | |
//print!("(OP:{ESC:02X}=ESC)(OP:{b:02X})"); | |
print!("(OP:{ESC:02X})(OP:{b:02X})"); | |
} | |
self.streams[ST_OP].extend_from_slice(&[ESC, b]); | |
} | |
fn op_map(&mut self, m: u8) { | |
let b = 0x40 + m; | |
if_trace_encode! { | |
//print!("(OP:{b:02X}=MAP{m:X})"); | |
print!("(OP:{b:02X})"); | |
} | |
assert!(m < 16); | |
self.streams[ST_OP].push(b); | |
} | |
fn call32(&mut self, idx: u8, target: u32) { | |
if idx == 0xff { | |
if_trace_encode! { | |
print!("(CALL_IDX:{idx:02X})(CALL32:{target:08X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
self.streams[ST_CALL32].extend_from_slice(&target.to_le_bytes()); | |
} else { | |
if_trace_encode! { | |
print!("(CALL_IDX:{idx:02X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
} | |
} | |
fn call64(&mut self, idx: u8, target: u64) { | |
if idx == 0xff { | |
if_trace_encode! { | |
print!("(CALL_IDX:{idx:02X})(CALL64:{target:016X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
self.streams[ST_CALL64].extend_from_slice(&target.to_le_bytes()); | |
} else { | |
if_trace_encode! { | |
print!("(CALL_IDX:{idx:02X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
} | |
} | |
fn jumptab(&mut self, count: u8) { | |
if_trace_encode! { | |
//print!("(OP:{JUMPTAB:02X}=JUMPTAB)(JUMPTAB_COUNT:{count:02X})"); | |
print!("(OP:{JUMPTAB:02X})(OP:{count:02X})"); | |
} | |
self.streams[ST_OP].push(JUMPTAB); | |
self.streams[ST_JUMPTAB_COUNT].push(count); | |
} | |
pub fn write_to(&self, w: &mut impl std::io::Write) -> std::io::Result<()> { | |
let mut mask = 0u64; | |
for (i, stream) in self.streams.iter().enumerate() { | |
if !stream.is_empty() { | |
mask |= 1 << i as u64; | |
} | |
} | |
assert!(NUM_STREAMS <= 60); | |
let mut header = vec![]; | |
header.extend_from_slice(&self.origin.to_le_bytes()); | |
header.push(self.sentinel_size); | |
header.extend_from_slice(&mask.to_le_bytes()); | |
for stream in &self.streams { | |
if !stream.is_empty() { | |
header.extend_from_slice(&(stream.len() as u32).to_le_bytes()); | |
} | |
} | |
w.write_all(must_cast_slice(&header))?; | |
for stream in &self.streams { | |
w.write_all(stream)?; | |
} | |
Ok(()) | |
} | |
} | |
#[inline(always)] | |
const fn rex_has_w(rex: u8) -> bool { | |
rex & 0x08 != 0 | |
} | |
// ___ ____ ____ | |
// VEX3 RXB0mmmm WvvvvLpp -> 1vvvvLpp 0000WRXB + map mmmm | |
// ^ | |
// The bit 4 of the second byte is technically the fifth map bit, | |
// which gets ignored by the current encoding scheme | |
// because it is currently completely unused. | |
#[inline(always)] | |
fn shuffle_vex3([x, y]: [u8; 2]) -> Option<([u8; 2], u8)> { | |
if x & 0x10 != 0 { | |
return None; | |
} | |
let map = x & 0x0f; | |
let rex = (y >> 4) & 0x08 | (!x >> 5); | |
let vex = 0x80 | (y & 0x7f); | |
Some(([vex, rex], map)) | |
} | |
#[inline(always)] | |
fn unshuffle_vex3([vex, rex]: [u8; 2], map: u8) -> Option<[u8; 2]> { | |
if vex & 0x80 == 0 || rex & 0xf0 != 0 || map >= 16 { | |
return None; | |
} | |
let x = (!rex & 0x07) << 5 | map; | |
let y = (rex & 0x08) << 4 | (vex & 0x7f); | |
Some([x, y]) | |
} | |
// _____ ____ | |
// VEX2 RvvvvLpp -> 1vvvvLpp 00000R00 + map 1 (implied) | |
#[inline(always)] | |
fn shuffle_vex2([x]: [u8; 1]) -> ([u8; 2], u8) { | |
let rex = (!x >> 5) & 0x04; | |
let vex = 0x80 | (x & 0x7f); | |
([vex, rex], 1) | |
} | |
#[inline(always)] | |
fn unshuffle_vex2([vex, rex]: [u8; 2], map: u8) -> Option<[u8; 1]> { | |
if vex & 0x80 == 0 || rex & 0xfb != 0 || map != 1 { | |
return None; | |
} | |
let x = (!rex & 0x04) << 5 | (vex & 0x7f); | |
Some([x]) | |
} | |
// _____ _____ _ _____ | |
// EVEX RXBrbmmm Wvvvvxpp **L*V*** -> **0*0*** VvvvvLpp 0rxbWRXB + map mmm | |
#[inline(always)] | |
fn shuffle_evex([x, y, z]: [u8; 3]) -> ([u8; 3], u8) { | |
let map = x & 0x07; | |
let rex = (!x & 0x10) << 2 | (!x & 0x08) << 1 | (!x >> 5) | (!y & 0x04) << 3 | (y >> 4) & 0x08; | |
let vex = (y & 0x7b) | (z >> 3) & 0x04 | (z & 0x08) << 4; | |
let evex = z & 0xd7; | |
([evex, vex, rex], map) | |
} | |
#[inline(always)] | |
fn unshuffle_evex([evex, vex, rex]: [u8; 3], map: u8) -> Option<[u8; 3]> { | |
if evex & 0x28 != 0 || rex & 0x80 != 0 || map >= 8 { | |
return None; | |
} | |
let x = (!rex & 0x07) << 5 | (!rex >> 2) & 0x10 | (!rex >> 1) & 0x08 | map; | |
let y = (rex & 0x08) << 4 | (vex & 0x7b) | (!rex >> 3) & 0x04; | |
let z = evex | (vex & 0x04) << 3 | (vex >> 4) & 0x08; | |
Some([x, y, z]) | |
} | |
#[inline(always)] | |
const fn parse_modrm(modrm: u8) -> (u8 /*mode*/, u8 /*base*/) { | |
(modrm >> 6, modrm & 0b111) | |
} | |
#[inline(always)] | |
const fn modrm_is_reg_only((mode, _base): (u8, u8)) -> bool { | |
mode == 0b11 | |
} | |
#[inline(always)] | |
const fn modrm_reg(modrm: u8) -> u8 { | |
modrm >> 3 & 7 | |
} | |
#[inline(always)] | |
const fn modrm_has_sib((mode, base): (u8, u8)) -> bool { | |
mode < 0b11 && base == 0b100 | |
} | |
fn range_chunks(count: usize, chunk_size: usize) -> impl Iterator<Item = ops::Range<usize>> { | |
let remainder = count % chunk_size; | |
(0..count - remainder) | |
.step_by(chunk_size) | |
.map(move |start| start..start + chunk_size) | |
.chain(if remainder > 0 { | |
Some(count - remainder..count) | |
} else { | |
None | |
}) | |
} | |
// Try to recognize common function boundary padding starting from `code[0]`: | |
// - `00` (ADD Eb,Gb) | |
// - `90` (NOP) | |
// - `0F 1F /0` (NOP E[bv]) | |
// `0F 1F 00'000'reg` | |
// `0F 1F 00'000'100 zz'zzz'zzz` | |
// `0F 1F 00'000'100 zz'zzz'101 XX XX XX XX` | |
// `0F 1F 00'000'101 XX XX XX XX` | |
// `0F 1F 01'000'reg XX` | |
// `0F 1F 01'000'100 ZZ XX` | |
// `0F 1F 10'000'reg XX XX XX XX` | |
// `0F 1F 10'000'100 ZZ XX XX XX XX` | |
// - `CC` (INT3) | |
// - `89 11'reg'reg` (MOV R#,R#) | |
// - `8D 00'reg'reg` (LEA R#,[R#]) where reg != 101 | |
// - `8D 01'reg'reg 00` (LEA R#,[R#+00h]) | |
// - `8D 10'reg'reg 00 00 00 00` (LEA R#,[R#+00000000h]) | |
// - `8D 00'reg'100 zz'100'reg (LEA R#,[R#*1]) where reg != 101 | |
// - `8D 01'reg'100 zz'100'reg 00` (LEA R#,[R#*1+00h]) where reg != 101 | |
// - `8D 10'reg'100 zz'100'reg 00 00 00 00` (LEA R#,[R#*1+00000000h]) where reg != 101 | |
// - Any 1-byte prefix besides from LOCK and REX: `26 2E 36 3E 64 65 66 67 F2 F3` | |
fn scan_pad(mut code: &[u8]) -> usize { | |
let len = code.len(); | |
// Skip any 00 bytes only at the very beginning of possible padding. | |
let n = code.iter().position(|&b| b != 0x00).unwrap_or(len); | |
code = &code[n..]; | |
loop { | |
code = match code { | |
[0xcc, rest @ ..] | |
| [0x90, rest @ ..] | |
| [0x66, 0x90, rest @ ..] | |
| [0x0f, 0x1f, 0x00, rest @ ..] | |
| [0x0f, 0x1f, 0x40, _, rest @ ..] | |
| [0x0f, 0x1f, 0x44, 0x00, _, rest @ ..] | |
| [0x66, 0x0f, 0x1f, 0x44, 0x00, _, rest @ ..] | |
| [0x0f, 0x1f, 0x80, _, _, _, _, rest @ ..] | |
| [0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] | |
| [0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] | |
| [0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] | |
| [0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] => rest, | |
_ => break, | |
}; | |
} | |
len - code.len() | |
} | |
/* | |
https://stackoverflow.com/questions/25545470/long-multi-byte-nops-commonly-understood-macros-or-other-notation | |
https://gist.github.com/stevemk14ebr/d117e8d0fd1432fb2a92354a034ce5b9 | |
if(code == OP_RETNI || code == OP_RETN || code == OP_INT3) // return/padding | |
nextIsFunc = sTRUE; // next opcode is likely to be first of a new function | |
CALL 9A E8 FF/2-3 | |
JMP E9 EA EB FF/4-5 | |
JMPE 0FB8(w/o F2/F3) 0F00/5 | |
RET C2 C3 CA CB CF | |
JMPABS REX2+A1 | |
REX2 invalid: 7x Ax Ex 13x | |
JMPABS transfers program control to the 64-bit absolute address target64 given as a quadword | |
immediate. JMPABS is in legacy map 0 and requires a REX2 prefix with REX2.M0 = 0 and REX2.W = 0. All | |
other REX2 payload bits are ignored, and code-generators should set these bits to 0. JMPABS does not | |
have a ModRM byte and target64 is placed immediately after the opcode byte, so the entire instruction is | |
11 bytes long. Prefixing JMPABS with 0x66, 0x67, 0xF0, 0xF2, or 0xF3 triggers #UD. Segment overrides are | |
allowed but ignored by JMPABS. | |
padding + function target | |
*/ | |
const fn pad_followed(op: u16) -> bool { | |
// TODO | |
matches!( | |
op, | |
0x9a | 0xe8 | 0xe9 | 0xea | 0xeb | 0xc2 | 0xc3 | 0xca | 0xcb | 0xcc | 0xcf | |
) | |
} | |
struct CallCache { | |
cache: [u64; 0x100], | |
} | |
impl CallCache { | |
fn new() -> Self { | |
Self { | |
cache: [0u64; 0x100], | |
} | |
} | |
#[inline(always)] | |
fn find_index(&self, target: u64) -> u8 { | |
self.cache | |
.iter() | |
.position(|&cached| cached == target) | |
.unwrap_or(0xff) as u8 | |
} | |
#[inline(always)] | |
fn find_target(&self, idx: u8) -> Option<u64> { | |
if idx < 0xff { | |
Some(self.cache[idx as usize]) | |
} else { | |
None | |
} | |
} | |
#[inline(always)] | |
fn record(&mut self, idx: u8, target: u64) { | |
self.cache.copy_within(0..(idx as usize), 1); | |
self.cache[0] = target; | |
} | |
} | |
#[cfg(feature = "encode")] | |
pub fn encode(mut code: Vec<u8>, mut origin: u64, is64: bool) -> Option<EncodedStreams> { | |
let mut st = EncodedStreams::new(origin); | |
let mut call_cache = CallCache::new(); | |
let mut call_idx = |target: u64| { | |
let idx = call_cache.find_index(target); | |
call_cache.record(idx, target); | |
idx | |
}; | |
let code_len = code.len(); | |
let code_start = origin; | |
let code_end = origin + code_len as u64; | |
// should be enough for this encoding scheme | |
const SENTINEL: usize = 15; | |
code.extend_from_slice(&[0u8; SENTINEL]); | |
let mut prefixes = 0; | |
let mut pad = false; | |
let mut code = &code[..]; | |
while code.len() > SENTINEL { | |
if_trace_encode! { | |
println!(); | |
print!("{origin:06X}: "); | |
//print!("{:02X?} ", &code[..SENTINEL]); | |
} | |
// Try to skip any padding. | |
if pad { | |
let stream = ST_PAD0 + (origin & 0xf) as usize; | |
let pad_size = scan_pad(&code[..code.len() - SENTINEL]); | |
let mut padding; | |
(padding, code) = code.split_at(pad_size); | |
origin += pad_size as u64; | |
while padding.len() >= 0xff { | |
let chunk; | |
(chunk, padding) = padding.split_at(0xff); | |
st.put8(stream, 0xff); | |
st.copy(stream, chunk); | |
} | |
st.put8(stream, padding.len() as u8); | |
st.copy(stream, padding); | |
if false { | |
// Speculatively record the next instruction as a call target. | |
call_idx(origin); | |
} | |
pad = false; | |
continue; | |
} | |
// Detect a possible jump table of at least 3 entries. | |
const MIN_JUMPTAB: usize = 3; | |
if !is64 { | |
// TBW | |
let mut i = 0; | |
let min_addr = code_start as u32; | |
let max_addr = (code_end - 1) as u32; | |
while i < code.len() - SENTINEL - 4 { | |
let addr = to_u32(&code[i..i + 4]); | |
if addr < min_addr || max_addr < addr { | |
break; | |
} | |
i += 4; | |
} | |
if i >= MIN_JUMPTAB * 4 { | |
for range in range_chunks(i / 4, 0x100) { | |
st.jumptab((range.len() - 1) as u8); | |
for j in range { | |
let addr = to_u32(&code[j * 4..j * 4 + 4]); | |
st.call32(call_idx(addr as u64), addr); | |
} | |
} | |
code = &code[i..]; | |
origin += i as u64; | |
continue; | |
} | |
} else if origin % 8 == 0 { | |
// In x86-64, jump tables are typically 64-bit aligned offsets. | |
// Since we don't know where they will be used, | |
// we assume that they may be used anywhere within this `code`. | |
let min_offset = -(code_len as i64 - 1); | |
let max_offset = code_len as i64 - 1; | |
let mut i = 0; | |
while i < code.len() - SENTINEL - 8 { | |
let offset = to_u64(&code[i..i + 8]) as i64; | |
if offset < min_offset || max_offset < offset { | |
break; | |
} | |
i += 8; | |
} | |
if i >= MIN_JUMPTAB * 8 { | |
for range in range_chunks(i / 8, 0x100) { | |
st.jumptab((range.len() - 1) as u8); | |
for j in range { | |
st.put64(ST_JUMPTAB64, to_u64(&code[j * 8..j * 8 + 8])); | |
} | |
} | |
code = &code[i..]; | |
origin += i as u64; | |
continue; | |
} | |
} | |
let mut i = 0; | |
let mut pre = 0; | |
let mut evex = 0; | |
let mut vex = 0; | |
let mut rex = 0; | |
let mut map = 0; | |
let mut op = 0; | |
// Handle prefixes that cannot be combined first. | |
// They are all followed by ModR/M, where mode = 0b11 is required in x86-32. | |
if is64 || modrm_is_reg_only(parse_modrm(code[1])) { | |
match code[0] { | |
PRE_VEX3 => { | |
if let Some(ret) = shuffle_vex3([code[1], code[2]]) { | |
pre = PRE_VEX3; | |
([vex, rex], map) = ret; | |
op = code[3]; | |
i = 4; | |
} | |
} | |
PRE_VEX2 => { | |
pre = PRE_VEX2; | |
([vex, rex], map) = shuffle_vex2([code[1]]); | |
op = code[2]; | |
i = 3; | |
} | |
PRE_EVEX => { | |
pre = PRE_EVEX; | |
([evex, vex, rex], map) = shuffle_evex([code[1], code[2], code[3]]); | |
op = code[4]; | |
i = 5; | |
} | |
_ => {} | |
} | |
} | |
// *VEX cannot coexist with 0F or REX. | |
let has_vex = i > 0; | |
if !has_vex { | |
if is64 { | |
let c = code[0]; | |
if c & 0xf0 == 0x40 { | |
// REX (40..4F) | |
pre = REX_MARKER; | |
rex = c & 0x0f; | |
i = 1; | |
} else if c == PRE_REX2 { | |
pre = PRE_REX2; | |
rex = code[1]; | |
i = 2; | |
} | |
} | |
if code[i] == PRE_2BYTE { | |
map = 1; | |
op = code[i + 1]; | |
i += 2; | |
} else { | |
op = code[i]; | |
i += 1; | |
} | |
} | |
let i = Cell::new(i); | |
let fetch8 = || { | |
let ret = code[i.get()]; | |
i.set(i.get() + 1); | |
ret | |
}; | |
let fetch32 = || { | |
let ret = to_u32(&code[i.get()..i.get() + 4]); | |
i.set(i.get() + 4); | |
ret | |
}; | |
let copy = |n: usize, streams: &mut EncodedStreams, st: usize| { | |
debug_assert_eq!(STREAM_SIZES[st], n); | |
streams.copy(st, &code[i.get()..i.get() + n]); | |
i.set(i.get() + n); | |
}; | |
let rel_to_abs = |addr: u32, delta: usize| { | |
(origin + (i.get() + delta) as u64).wrapping_add(addr as i32 as u64) | |
}; | |
let mut flags = lookup_opcode(op, map, is64); | |
if flags == BP { | |
// 1-byte prefixes can't have any *VEX and REX prefix. | |
if i.get() != 1 { | |
flags = XX; | |
} else { | |
assert_eq!(map, 0); | |
prefixes |= 1 << prefix_hash(op); | |
st.op(map, op); | |
code = &code[1..]; | |
origin += 1; | |
continue; | |
} | |
} else if op3_followed(flags) { | |
// 3-byte opcode prefixes can't have any *VEX prefix which has its own map index. | |
if has_vex { | |
flags = XX; | |
} else { | |
assert_eq!(map, 1); | |
map = (flags - M2) + 2; | |
flags = (flags - M2) + R_; | |
op = fetch8(); | |
} | |
} | |
let mut prefixes = mem::replace(&mut prefixes, 0); | |
if flags == XX { | |
// Do NOT commit the current position if this instruction is invalid. | |
st.op_esc(code[0]); | |
code = &code[1..]; | |
origin += 1; | |
continue; | |
} | |
// Now we can commit any prefixes and opcode. | |
if pre != 0 { | |
st.put8(ST_OP, pre); | |
if pre == PRE_EVEX { | |
st.put8(ST_EVEX, evex); | |
st.put8(ST_VEX, vex); | |
} else if pre == PRE_VEX2 || pre == PRE_VEX3 { | |
st.put8(ST_VEX, vex); | |
} | |
st.put8(ST_REX, rex); | |
} | |
st.op(map, op); | |
let op = (map as u16) << 8 | op as u16; | |
match op { | |
// Parse an additional 16-bit immediate for these: | |
// | |
// 9A/EA: CALL/JMP Ap (16-bit segment + 32-bit address) | |
// C8: ENTER Iw,Ib (16-bit immediate + 8-bit immediate) | |
OP_CALLF | OP_JMPF | OP_ENTER => { | |
copy(16 / 8, &mut st, ST_IMM16); | |
} | |
// F6/F7: TEST E,I (/0-1) vs. NOT/NEG/[I]MUL/[I]DIV E (/2-7) | |
OP_GRP3_1 | OP_GRP3_2 | OP_MAP4_GRP3_1 | OP_MAP4_GRP3_2 | |
if modrm_reg(code[i.get()]) >= 2 => | |
{ | |
flags = R_; | |
} | |
// 0F B8: JMPE Jz (IA-64 only) vs. POPCNT Gv,Ev (F3) | |
OP_JMPE_POPCNT if has_rep_prefix(prefixes) => { | |
flags = R_; | |
} | |
// MAP7 F8: URDMSR Rq,Id; UWRMSR Id,Rq (immediate size doesn't depend on 66) | |
OP_URDMSR_UWRMSR => { | |
prefixes &= !(1 << prefix_hash(PRE_OSIZE)); | |
} | |
_ => {} | |
} | |
pad = pad_followed(op); | |
// ModR/M present | |
if has_modrm(flags) { | |
flags = modrm_to_imm(flags); | |
let modrm = fetch8(); | |
st.put8(ST_MODRM, modrm); | |
let (mode, base) = parse_modrm(modrm); | |
let sib; | |
if modrm_has_sib((mode, base)) { | |
sib = fetch8(); | |
st.put8(ST_SIB, sib); | |
} else { | |
sib = 0; | |
} | |
match mode { | |
0 if base == 5 => { | |
let addr = fetch32(); | |
if is64 { | |
// [eip+disp32] or [rip+disp32] | |
// Note that we haven't fully decoded operands yet, hence a delta. | |
let delta = [0, 1, 2, 4][flags as usize]; | |
st.put64(ST_ADDR64, rel_to_abs(addr, delta)); | |
} else { | |
st.put32(ST_ADDR32, addr); // [disp32] | |
} | |
} | |
0 if sib & 7 == 5 => copy(32 / 8, &mut st, ST_DISP32), // [reg*scale+disp32] | |
// [reg+disp8] or [reg*scale+disp8] | |
1 => copy(8 / 8, &mut st, ST_DISP8_R0 + base as usize), | |
2 => copy(32 / 8, &mut st, ST_DISP32), // [reg+disp32] | |
_ => {} | |
} | |
} | |
match flags { | |
J4 => { | |
let target = rel_to_abs(fetch32(), 0); | |
if op == OP_CALLN { | |
let idx = call_idx(target); | |
if is64 { | |
st.call64(idx, target); | |
} else { | |
st.call32(idx, target as u32); | |
} | |
} else { | |
if is64 { | |
st.put64(ST_JUMP64, target); | |
} else { | |
st.put32(ST_JUMP32, target as u32); | |
} | |
} | |
} | |
A_ => { | |
// EA: 32-bit only, 16-bit if 66 ("Ap" = w:z) | |
// Ax: 32-bit or 64-bit, fixed per operating mode ("Ov") | |
let lgn = if is64 { | |
3 | |
} else if op == OP_JMPF && has_osize_prefix(prefixes) { | |
1 | |
} else { | |
2 | |
}; | |
copy(1 << lgn, &mut st, (ST_ADDR16 - 1) + lgn); | |
} | |
JA => copy(32 / 8, &mut st, ST_AJUMP32), | |
J1 => copy(8 / 8, &mut st, ST_JUMP8), | |
N_ => {} | |
_ => { | |
assert!(matches!(flags, N1 | N2 | N4 | NZ)); | |
if flags == NZ && !rex_has_w(rex) { | |
flags = N4; | |
} | |
if flags == N4 && has_osize_prefix(prefixes) { | |
flags = N2; | |
} | |
let lgn = (flags - N1) as usize; | |
copy(1 << lgn, &mut st, ST_IMM8 + lgn); | |
} | |
} | |
let i = i.get(); | |
code = &code[i..]; | |
origin += i as u64; | |
} | |
st.sentinel_size = (SENTINEL - code.len()) as u8; | |
Some(st) | |
} | |
#[cfg(feature = "decode")] | |
#[derive(Debug)] | |
pub struct DecodedStreams<'a> { | |
origin: u64, | |
sentinel_size: u8, | |
streams: [&'a [u8]; NUM_STREAMS], | |
} | |
#[cfg(all(feature = "encode", feature = "decode"))] | |
impl<'a> From<&'a EncodedStreams> for DecodedStreams<'a> { | |
fn from(streams: &'a EncodedStreams) -> Self { | |
Self { | |
origin: streams.origin, | |
sentinel_size: streams.sentinel_size, | |
streams: streams.streams.each_ref().map(|s| &s[..]), | |
} | |
} | |
} | |
#[cfg(feature = "decode")] | |
#[export_name = "disfilter_decode"] | |
pub fn decode(streams: &DecodedStreams<'_>, is64: bool) -> Option<Vec<u8>> { | |
let origin = streams.origin; | |
let sentinel_size = streams.sentinel_size; | |
let streams = streams | |
.streams | |
.each_ref() | |
.map(|stream| Cell::new(&stream[..])); | |
let pc = Cell::new(origin); | |
let mut code = Vec::new(); | |
let read8 = |st: usize| { | |
if_trace_decode! { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (&[head], tail) = streams[st].get().split_first_chunk::<1>()?; | |
if_trace_decode! { | |
print!("{head:02X})"); | |
} | |
streams[st].set(tail); | |
Some(head) | |
}; | |
let read32 = |st: usize| { | |
if_trace_decode! { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (&head, tail) = streams[st].get().split_first_chunk::<4>()?; | |
let head = u32::from_le_bytes(head); | |
if_trace_decode! { | |
print!("{head:08X})"); | |
} | |
streams[st].set(tail); | |
Some(head) | |
}; | |
let read64 = |st: usize| { | |
if_trace_decode! { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (&head, tail) = streams[st].get().split_first_chunk::<8>()?; | |
let head = u64::from_le_bytes(head); | |
if_trace_decode! { | |
print!("{head:016X})"); | |
} | |
streams[st].set(tail); | |
Some(head) | |
}; | |
let mut call_cache = CallCache::new(); | |
let read_call = |cache: &mut CallCache| { | |
let idx = read8(ST_CALL_IDX)?; | |
let target = if let Some(target) = cache.find_target(idx) { | |
target | |
} else if is64 { | |
read64(ST_CALL64)? | |
} else { | |
read32(ST_CALL32)? as u64 | |
}; | |
cache.record(idx, target); | |
Some(target) | |
}; | |
let copy = |n: usize, code: &mut Vec<u8>, st: usize| { | |
if_trace_decode! { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (head, tail) = streams[st].get().split_at(n); | |
if_trace_decode! { | |
for &b in head { | |
print!("{:02X}", b); | |
} | |
print!(")"); | |
} | |
code.extend_from_slice(head); | |
streams[st].set(tail); | |
Some(()) | |
}; | |
macro_rules! fatal { | |
($fmt:tt) => { | |
panic!( | |
concat!($fmt, " @ {:06X} {:02X?}"), | |
pc.get(), | |
&code[code.len().max(15) - 15..] | |
) | |
}; | |
} | |
let mut prefixes = 0; | |
let mut pad = false; | |
while !streams[ST_OP].get().is_empty() { | |
pc.set(origin + code.len() as u64); | |
if_trace_decode! { | |
println!(); | |
print!("{:06X}: ", pc.get()); | |
} | |
if pad { | |
let stream = ST_PAD0 + ((origin as usize + code.len()) & 0xf); | |
loop { | |
let pad_size = read8(stream)?; | |
copy(pad_size as usize, &mut code, stream)?; | |
if pad_size < 0xff { | |
break; | |
} | |
} | |
if false { | |
// Speculatively record the next instruction as a call target. | |
let target = origin + code.len() as u64; | |
let idx = call_cache.find_index(target); | |
call_cache.record(idx, target); | |
} | |
pad = false; | |
continue; | |
} | |
let mut op = read8(ST_OP).unwrap(); | |
if op == ESC { | |
code.push(read8(ST_OP)?); | |
continue; | |
} | |
if op == JUMPTAB { | |
let count = read8(ST_JUMPTAB_COUNT)? as usize + 1; | |
if is64 { | |
for _ in 0..count { | |
code.extend_from_slice(&read64(ST_JUMPTAB64)?.to_le_bytes()); | |
} | |
} else { | |
for _ in 0..count { | |
code.extend_from_slice(&(read_call(&mut call_cache)? as u32).to_le_bytes()); | |
} | |
} | |
continue; | |
} | |
let (pre, evex, vex, rex) = match op { | |
PRE_VEX3 => (op, 0, read8(ST_VEX)?, read8(ST_REX)?), | |
PRE_VEX2 => (op, 0, read8(ST_VEX)?, read8(ST_REX)?), | |
PRE_EVEX => (op, read8(ST_EVEX)?, read8(ST_VEX)?, read8(ST_REX)?), | |
REX_MARKER | PRE_REX2 if !is64 => fatal!("unsupported REX prefixes in x86-32"), | |
REX_MARKER => (op, 0, 0, read8(ST_REX)?), | |
PRE_REX2 => (op, 0, 0, read8(ST_REX)?), | |
_ => (0, 0, 0, 0), | |
}; | |
if pre != 0 { | |
op = read8(ST_OP)?; | |
} | |
let map; | |
if op & 0xf0 == 0x40 { | |
map = op & 0x0f; | |
op = read8(ST_OP)?; | |
} else { | |
map = 0; | |
} | |
let mut flags = lookup_opcode(op, map, is64); | |
if flags == XX { | |
fatal!("invalid opcode"); | |
} else if flags == BP { | |
assert_eq!(map, 0); | |
prefixes |= 1 << prefix_hash(op); | |
code.push(op); | |
continue; | |
} | |
'prefix: { | |
match pre { | |
PRE_VEX3 => { | |
let Some([x, y]) = unshuffle_vex3([vex, rex], map) else { | |
fatal!("bad VEX3 prefix"); | |
}; | |
code.extend_from_slice(&[PRE_VEX3, x, y, op]); | |
break 'prefix; | |
} | |
PRE_VEX2 => { | |
let Some([x]) = unshuffle_vex2([vex, rex], map) else { | |
fatal!("bad VEX2 prefix"); | |
}; | |
code.extend_from_slice(&[PRE_VEX2, x, op]); | |
break 'prefix; | |
} | |
PRE_EVEX => { | |
let Some([x, y, z]) = unshuffle_evex([evex, vex, rex], map) else { | |
fatal!("bad EVEX prefix"); | |
}; | |
code.extend_from_slice(&[PRE_EVEX, x, y, z, op]); | |
break 'prefix; | |
} | |
REX_MARKER => { | |
if rex & 0xf0 != 0 { | |
fatal!("bad REX prefix"); | |
} | |
code.push(0x40 | rex); | |
} | |
PRE_REX2 => code.extend_from_slice(&[PRE_REX2, rex]), | |
0 => {} | |
_ => unreachable!(), | |
} | |
// Only applicable with non-*VEX prefixes. | |
match map { | |
0 => code.push(op), | |
1 => code.extend_from_slice(&[0x0f, op]), | |
2 => code.extend_from_slice(&[0x0f, 0x38, op]), | |
3 => code.extend_from_slice(&[0x0f, 0x3a, op]), | |
_ => fatal!("bad opcode map"), | |
} | |
} | |
let mut prefixes = mem::replace(&mut prefixes, 0); | |
let op = (map as u16) << 8 | op as u16; | |
match op { | |
// Parse an additional 16-bit immediate for these: | |
// | |
// 9A/EA: CALL/JMP Ap (16-bit segment + 32-bit address) | |
// C8: ENTER Iw,Ib (16-bit immediate + 8-bit immediate) | |
OP_CALLF | OP_JMPF | OP_ENTER => { | |
copy(16 / 8, &mut code, ST_IMM16)?; | |
} | |
// F6/F7: TEST E,I (/0-1) vs. NOT/NEG/[I]MUL/[I]DIV E (/2-7) | |
OP_GRP3_1 | OP_GRP3_2 | OP_MAP4_GRP3_1 | OP_MAP4_GRP3_2 | |
if modrm_reg(streams[ST_MODRM].get()[0]) >= 2 => | |
{ | |
flags = R_; | |
} | |
// 0F B8: JMPE Jz (IA-64 only) vs. POPCNT Gv,Ev (F3) | |
OP_JMPE_POPCNT if has_rep_prefix(prefixes) => { | |
flags = R_; | |
} | |
// MAP7 F8: URDMSR Rq,Id; UWRMSR Id,Rq (immediate size doesn't depend on 66) | |
OP_URDMSR_UWRMSR => { | |
prefixes &= !(1 << prefix_hash(PRE_OSIZE)); | |
} | |
_ => {} | |
} | |
pad = pad_followed(op); | |
let abs_to_rel = |addr: u64, code: &[u8], delta: usize| { | |
addr.wrapping_sub(origin + (code.len() + delta) as u64) | |
}; | |
// ModR/M present | |
if has_modrm(flags) { | |
flags = modrm_to_imm(flags); | |
let modrm = read8(ST_MODRM)?; | |
code.push(modrm); | |
let (mode, base) = parse_modrm(modrm); | |
let sib; | |
if modrm_has_sib((mode, base)) { | |
sib = read8(ST_SIB)?; | |
code.push(sib); | |
} else { | |
sib = 0; | |
} | |
match mode { | |
0 if base == 5 => { | |
let addr = if is64 { | |
// [eip+disp32] or [rip+disp32] | |
// Note that we haven't fully decoded operands yet, hence a delta. | |
let delta = [0, 1, 2, 4][flags as usize]; | |
abs_to_rel(read64(ST_ADDR64)?, &code, delta + 4) as u32 | |
} else { | |
read32(ST_ADDR32)? // [disp32] | |
}; | |
code.extend_from_slice(&addr.to_le_bytes()); | |
} | |
// [reg*scale+disp32] | |
0 if sib & 7 == 5 => copy(32 / 8, &mut code, ST_DISP32)?, | |
// [reg+disp8] or [reg*scale+disp8] | |
1 => copy(8 / 8, &mut code, ST_DISP8_R0 + base as usize)?, | |
2 => copy(32 / 8, &mut code, ST_DISP32)?, // [reg+disp32] | |
_ => {} | |
} | |
} | |
match flags { | |
J4 => { | |
let target = if op == OP_CALLN { | |
read_call(&mut call_cache)? | |
} else if is64 { | |
read64(ST_JUMP64)? | |
} else { | |
read32(ST_JUMP32)? as u64 | |
}; | |
let target = abs_to_rel(target as u64, &code, 4) as u32; | |
code.extend_from_slice(&target.to_le_bytes()); | |
} | |
A_ => { | |
// EA: 32-bit only, 16-bit if 66 ("Ap" = w:z) | |
// Ax: 32-bit or 64-bit, fixed per operating mode ("Ov") | |
let lgn = if is64 { | |
3 | |
} else if op == OP_JMPF && has_osize_prefix(prefixes) { | |
1 | |
} else { | |
2 | |
}; | |
copy(1 << lgn, &mut code, (ST_ADDR16 - 1) + lgn)?; | |
} | |
JA => copy(32 / 8, &mut code, ST_AJUMP32)?, | |
J1 => copy(8 / 8, &mut code, ST_JUMP8)?, | |
N_ => {} | |
_ => { | |
assert!(matches!(flags, N1 | N2 | N4 | NZ)); | |
if flags == NZ && !rex_has_w(rex) { | |
flags = N4; | |
} | |
if flags == N4 && has_osize_prefix(prefixes) { | |
flags = N2; | |
} | |
let lgn = (flags - N1) as usize; | |
copy(1 << lgn, &mut code, ST_IMM8 + lgn)?; | |
} | |
} | |
} | |
code.truncate(code.len().checked_sub(sentinel_size as usize)?); | |
Some(code) | |
} | |
#[cfg(feature = "encode")] | |
pub fn locate_code(f: &mut std::fs::File) -> std::io::Result<Vec<(u64, u64, usize)>> { | |
use std::io::{self, BufReader, Read, Seek, SeekFrom}; | |
f.seek(SeekFrom::Start(0))?; | |
let mut r = BufReader::new(f); | |
// MZ header | |
let mut buf = [0u8; 0x40]; | |
r.read_exact(&mut buf)?; | |
let sig = to_u16(&buf[0..2]); | |
if sig != 0x5a4d { | |
return Err(io::Error::other(format!("bad MZ signature {sig:#x}"))); | |
} | |
let pe_offset = to_u32(&buf[0x3c..0x40]); | |
if pe_offset < 0x40 { | |
return Err(io::Error::other(format!( | |
"too low offset to PE header {pe_offset:#x}" | |
))); | |
} | |
r.seek_relative(pe_offset as i64 - 0x40)?; | |
// PE header | |
let mut buf = [0u8; 0x18]; | |
r.read_exact(&mut buf)?; | |
let sig = to_u32(&buf[0..4]); | |
if sig != 0x4550 { | |
return Err(io::Error::other(format!("bad PE signature {sig:#x}"))); | |
} | |
let num_sections = to_u16(&buf[6..8]); | |
let opt_header_size = to_u16(&buf[0x14..0x16]) as usize; | |
// PE optional header | |
let mut opt_header = vec![0u8; opt_header_size]; | |
r.read_exact(&mut opt_header)?; | |
let magic = to_u16(&opt_header[0..2]); | |
match magic { | |
0x10b => { | |
// IMAGE_OPTIONAL_HEADER32 | |
todo!(); | |
} | |
0x20b => { | |
// IMAGE_OPTIONAL_HEADER64 | |
if opt_header_size < 0x60 { | |
return Err(io::Error::other(format!( | |
"PE64 optional header too small ({opt_header_size:#x} < 0x60)" | |
))); | |
} | |
let num_data_dirs = to_u32(&opt_header[0x5c..0x60]) as usize; | |
let min_size = 0x60 + num_data_dirs * 0x10; | |
if opt_header_size < min_size { | |
return Err(io::Error::other(format!( | |
"PE64 optional header too small ({opt_header_size:#x} < {min_size:#x})" | |
))); | |
} | |
// data directories: | |
// EXPORT, IMPORT, RESOURCE, EXCEPTION, SECURITY, BASERELOC, DEBUG, COPYRIGHT, | |
// GLOBALPTR, TLS, LOAD_CONFIG, BOUND_IMPORT, IAT, DELAY_IMPORT, COM_DESCRIPTOR, - | |
} | |
_ => { | |
return Err(io::Error::other(format!( | |
"bad PE optional header magic {magic:#x}" | |
))) | |
} | |
} | |
// section headers | |
let mut exec_sections = Vec::new(); | |
for _ in 0..num_sections { | |
let mut buf = [0u8; 40]; | |
r.read_exact(&mut buf)?; | |
let name = &buf[..8]; | |
let _name = String::from_utf8_lossy(name); | |
let rva = to_u32(&buf[12..16]); | |
let stored_size = to_u32(&buf[16..20]); | |
let stored_offset = to_u32(&buf[20..24]); | |
let flags = to_u32(&buf[36..40]); | |
//println!("section {_name:?}: rva {rva:#x}, stored {stored_offset:#x} + {stored_size:#x}, flags {flags:#x}"); | |
if flags & 0x20 != 0 { | |
exec_sections.push((rva as u64, stored_offset as u64, stored_size as usize)); | |
} | |
} | |
Ok(exec_sections) | |
} | |
#[cfg(all( | |
test, | |
feature = "encode", | |
feature = "decode", | |
not(target_family = "wasm") | |
))] | |
mod tests { | |
use super::*; | |
#[test] | |
fn test_disfilter() -> std::io::Result<()> { | |
use std::env; | |
use std::fs::File; | |
use std::io::{Read, Seek, SeekFrom, Write}; | |
use std::iter; | |
use std::time::Instant; | |
env::set_var("RUST_LOG", "trace"); | |
env_logger::init(); | |
let mut f = File::open(r"c:\Program Files\ImageMagick-7.1.1-Q16-HDRI\ffmpeg.exe")?; | |
for (origin, offset, size) in locate_code(&mut f)? { | |
f.seek(SeekFrom::Start(offset))?; | |
let mut input = vec![0u8; size]; | |
f.read_exact(&mut input)?; | |
File::create(r"x:\unfiltered.bin")?.write_all(&input)?; | |
let start = Instant::now(); | |
let streams = encode(input.clone(), origin, true).expect("failed to encode"); | |
let enc_rate = size as f64 / start.elapsed().as_secs_f64() / 1e6; | |
streams.write_to(&mut File::create(r"x:\filtered.bin")?)?; | |
let start = Instant::now(); | |
let recons = decode(&(&streams).into(), true).expect("round trip failed"); | |
let dec_rate = size as f64 / start.elapsed().as_secs_f64() / 1e6; | |
if input != recons { | |
let mismatch = iter::zip(&input, &recons) | |
.position(|(a, b)| a != b) | |
.unwrap(); | |
let lo = mismatch.max(15) - 15; | |
let hi = mismatch + 15; | |
panic!( | |
"input != recons\n \ | |
Input: {mismatch}/{} {:02X?}\n \ | |
Recons: {mismatch}/{} {:02X?}", | |
input.len(), | |
&input[lo..hi.min(input.len())], | |
recons.len(), | |
&recons[lo..hi.min(recons.len())], | |
); | |
} | |
eprintln!("Disfilter: encoding {enc_rate:.2} MB/s, decoding {dec_rate:.2} MB/s"); | |
break; | |
} | |
Ok(()) | |
} | |
#[test] | |
fn test_call_cache() { | |
let mut expected = vec![ | |
(1234, 0xff), | |
(1234, 0), | |
(5678, 0xff), | |
(1234, 1), | |
(1234, 0), | |
(5678, 1), | |
]; | |
for i in 9000..9256 { | |
expected.push((i, 0xff)); | |
} | |
expected.push((1234, 0xff)); | |
expected.push((5678, 0xff)); | |
let mut cache = CallCache::new(); | |
for &(target, idx) in &expected { | |
assert_eq!(cache.find_index(target), idx); | |
cache.record(idx, target); | |
} | |
let mut cache = CallCache::new(); | |
for &(target, idx) in &expected { | |
if idx == 0xff { | |
assert_eq!(cache.find_target(idx), None); | |
} else { | |
assert_eq!(cache.find_target(idx), Some(target)); | |
} | |
cache.record(idx, target); | |
} | |
} | |
#[cfg(test)] | |
fn test_shuffle<const IN: usize, const OUT: usize>( | |
shuffle: impl Fn([u8; IN]) -> Option<([u8; OUT], u8)>, | |
unshuffle: impl Fn([u8; OUT], u8) -> Option<[u8; IN]>, | |
map_range: impl Iterator<Item = u8> + Clone, | |
) { | |
#[inline(always)] | |
fn generate<const N: usize>() -> impl Iterator<Item = [u8; N]> { | |
assert!(N <= 3); | |
(0..1u32 << (N * 8)).map(|n| { | |
let mut b = [0u8; N]; | |
for i in 0..N { | |
b[i] = (n >> (i * 8) & 0xff) as u8; | |
} | |
b | |
}) | |
} | |
let mut seen = std::collections::HashSet::new(); | |
for i in generate::<IN>() { | |
if let Some((o, map)) = shuffle(i) { | |
assert_eq!( | |
unshuffle(o, map), | |
Some(i), | |
"{i:02X?} -> {o:02X?} + {map} -> (roundtrip failed)" | |
); | |
assert!(seen.insert((o, map)), "{i:02X?} -> {o:02X?} + {map} (dupe)"); | |
} | |
} | |
for o in generate::<OUT>() { | |
for map in map_range.clone() { | |
if !seen.contains(&(o, map)) { | |
assert_eq!( | |
unshuffle(o, map), | |
None, | |
"? <- {o:02X?} + {map} (didn't fail)" | |
); | |
} | |
} | |
} | |
} | |
#[test] | |
fn test_shuffle_vex3() { | |
test_shuffle(shuffle_vex3, unshuffle_vex3, 0..=16); | |
} | |
#[test] | |
fn test_shuffle_vex2() { | |
test_shuffle(|i| Some(shuffle_vex2(i)), unshuffle_vex2, 0..=16); | |
} | |
#[test] | |
fn test_shuffle_evex() { | |
test_shuffle( | |
|i| Some(shuffle_evex(i)), | |
unshuffle_evex, | |
[5, 8].into_iter(), | |
); | |
} | |
// Helper function: test encode/decode roundtrip for byte array | |
fn roundtrip_test(code: &[u8], origin: u64, is64: bool) -> Result<(), String> { | |
let input = code.to_vec(); | |
let encoded = encode(input.clone(), origin, is64).ok_or("encoding failed")?; | |
let decoded = decode(&(&encoded).into(), is64).ok_or("decoding failed")?; | |
if input != decoded { | |
let mismatch = input | |
.iter() | |
.zip(&decoded) | |
.position(|(a, b)| a != b) | |
.unwrap_or(input.len().min(decoded.len())); | |
let input_end = (mismatch + 8).min(input.len()); | |
let decoded_end = (mismatch + 8).min(decoded.len()); | |
return Err(format!( | |
"Length mismatch or content mismatch at offset {}: input_len={}, decoded_len={}, input[{}..{}]={:02X?}, decoded[{}..{}]={:02X?}", | |
mismatch, | |
input.len(), | |
decoded.len(), | |
mismatch, input_end, &input[mismatch..input_end], | |
mismatch, decoded_end, &decoded[mismatch..decoded_end] | |
)); | |
} | |
Ok(()) | |
} | |
#[test] | |
fn test_rip_relative_addressing() { | |
// Test cases for RIP-relative addressing | |
let test_cases = vec![ | |
// MOV EAX, [RIP+disp32] - various displacements | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0x1000, | |
"MOV EAX, [RIP+0]", | |
), | |
( | |
vec![0x8b, 0x05, 0x10, 0x00, 0x00, 0x00], | |
0x1000, | |
"MOV EAX, [RIP+16]", | |
), | |
( | |
vec![0x8b, 0x05, 0xff, 0xff, 0xff, 0xff], | |
0x1000, | |
"MOV EAX, [RIP-1]", | |
), | |
// LEA with RIP-relative | |
( | |
vec![0x48, 0x8d, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0x2000, | |
"LEA RAX, [RIP+0]", | |
), | |
( | |
vec![0x48, 0x8d, 0x05, 0x20, 0x00, 0x00, 0x00], | |
0x2000, | |
"LEA RAX, [RIP+32]", | |
), | |
// CMP with RIP-relative and immediate | |
( | |
vec![0x48, 0x83, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x42], | |
0x3000, | |
"CMP QWORD PTR [RIP+0], 0x42", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_modrm_edge_cases() { | |
let test_cases = vec![ | |
// SIB with base=5 (EBP/RBP special case) | |
( | |
vec![0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00], | |
0x1000, | |
"MOV EAX, [0]", | |
), | |
(vec![0x8b, 0x44, 0x25, 0x10], 0x1000, "MOV EAX, [EBP+10h]"), | |
// ModR/M mode=0, base=5 (special case) | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0x1000, | |
"MOV EAX, [RIP+0] (32-bit: [disp32])", | |
), | |
// Complex SIB combinations | |
( | |
vec![0x8b, 0x04, 0x85, 0x00, 0x00, 0x00, 0x00], | |
0x1000, | |
"MOV EAX, [EAX*4+0]", | |
), | |
( | |
vec![0x8b, 0x44, 0x85, 0x10], | |
0x1000, | |
"MOV EAX, [EBP+EAX*4+10h]", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
// Test in both 64-bit and 32-bit modes (excluding RIP-relative cases) | |
if !code.starts_with(&[0x8b, 0x05]) { | |
roundtrip_test(&code, origin, false) | |
.unwrap_or_else(|e| panic!("Failed {} (32-bit): {}", desc, e)); | |
} | |
} | |
} | |
#[test] | |
fn test_rex_prefix_combinations() { | |
let test_cases = vec![ | |
// REX.W | |
(vec![0x48, 0x89, 0xc8], 0x1000, "REX.W MOV RAX, RCX"), | |
// REX.R (extends ModR/M reg field) | |
(vec![0x44, 0x89, 0xc0], 0x1000, "REX.R MOV EAX, R8D"), | |
// REX.X (extends SIB index field) | |
( | |
vec![0x42, 0x8b, 0x04, 0x00], | |
0x1000, | |
"REX.X MOV EAX, [RAX+R8*1]", | |
), | |
// REX.B (extends ModR/M r/m field) | |
(vec![0x41, 0x89, 0xc0], 0x1000, "REX.B MOV R8D, EAX"), | |
// All REX bit combinations | |
( | |
vec![0x4f, 0x89, 0x04, 0x08], | |
0x1000, | |
"REX.WRXB MOV [R8+R9*1], RAX", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_immediate_size_variations() { | |
let test_cases = vec![ | |
// 8-bit immediate | |
(vec![0x83, 0xc0, 0x10], 0x1000, "ADD EAX, 10h"), | |
// 16-bit immediate with 66 prefix | |
(vec![0x66, 0x81, 0xc0, 0x00, 0x10], 0x1000, "ADD AX, 1000h"), | |
// 32-bit immediate | |
( | |
vec![0x81, 0xc0, 0x00, 0x00, 0x00, 0x10], | |
0x1000, | |
"ADD EAX, 10000000h", | |
), | |
// 64-bit immediate with REX.W | |
( | |
vec![0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10], | |
0x1000, | |
"MOV RAX, 1000000000000000h", | |
), | |
// Operand size prefix effect test | |
(vec![0x66, 0xb8, 0x00, 0x10], 0x1000, "MOV AX, 1000h"), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_call_instruction_variations() { | |
let test_cases = vec![ | |
// Near call with 32-bit displacement | |
(vec![0xe8, 0x00, 0x00, 0x00, 0x00], 0x1000, "CALL +0"), | |
(vec![0xe8, 0x10, 0x00, 0x00, 0x00], 0x1000, "CALL +10h"), | |
(vec![0xe8, 0xf0, 0xff, 0xff, 0xff], 0x1000, "CALL -10h"), | |
// Indirect call | |
(vec![0xff, 0xd0], 0x1000, "CALL EAX"), | |
( | |
vec![0xff, 0x15, 0x00, 0x00, 0x00, 0x00], | |
0x1000, | |
"CALL [RIP+0]", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_jump_instruction_variations() { | |
let test_cases = vec![ | |
// Short jump (8-bit) | |
(vec![0xeb, 0x10], 0x1000, "JMP SHORT +10h"), | |
(vec![0xeb, 0xf0], 0x1000, "JMP SHORT -10h"), | |
// Near jump (32-bit) | |
(vec![0xe9, 0x00, 0x00, 0x00, 0x00], 0x1000, "JMP +0"), | |
(vec![0xe9, 0x00, 0x10, 0x00, 0x00], 0x1000, "JMP +1000h"), | |
// Conditional jumps | |
(vec![0x0f, 0x84, 0x00, 0x00, 0x00, 0x00], 0x1000, "JZ +0"), | |
(vec![0x74, 0x10], 0x1000, "JZ SHORT +10h"), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_vex_prefix_combinations() { | |
let test_cases = vec![ | |
// VEX2 prefix | |
( | |
vec![0xc5, 0xf8, 0x58, 0xc1], | |
0x1000, | |
"VEX2 VADDPS XMM0, XMM1", | |
), | |
// VEX3 prefix - map 1 | |
( | |
vec![0xc4, 0xe1, 0xf8, 0x58, 0xc1], | |
0x1000, | |
"VEX3 VADDPS XMM0, XMM1", | |
), | |
// VEX3 prefix - map 2 | |
( | |
vec![0xc4, 0xe2, 0x79, 0x00, 0xc1], | |
0x1000, | |
"VEX3 VPSHUFB XMM0, XMM1", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
// VEX is only valid in 64-bit mode | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_edge_case_addresses() { | |
// Test RIP-relative calculation at boundary addresses | |
let test_cases = vec![ | |
// Near zero address | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0x10, | |
"Low address", | |
), | |
// High address | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0xfffff000, | |
"High address", | |
), | |
// Page boundary | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0x1000, | |
"Page boundary", | |
), | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x00, 0x00], | |
0x0ffe, | |
"Near page boundary", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_instruction_boundary_detection() { | |
// Test correct boundary detection in complex instruction sequences | |
let complex_sequence = vec![ | |
// CALL + padding pattern | |
0xe8, 0x10, 0x00, 0x00, 0x00, // CALL +10h | |
0x90, 0x90, 0x90, // NOP padding | |
// MOV with immediate | |
0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, // MOV RAX, imm64 | |
// RIP-relative LEA | |
0x48, 0x8d, 0x05, 0x00, 0x00, 0x00, 0x00, // LEA RAX, [RIP+0] | |
]; | |
roundtrip_test(&complex_sequence, 0x1000, true) | |
.unwrap_or_else(|e| panic!("Failed complex sequence: {}", e)); | |
} | |
#[test] | |
fn test_32bit_vs_64bit_differences() { | |
// Instructions that are interpreted differently in 32-bit vs 64-bit modes | |
let test_cases = vec![ | |
// PUSH/POP immediate - some only valid in 32-bit | |
(vec![0x68, 0x00, 0x00, 0x00, 0x00], "PUSH imm32"), | |
// General MOV | |
(vec![0x89, 0xc0], "MOV EAX, EAX"), | |
// Address size prefix effect | |
(vec![0x67, 0x8b, 0x00], "MOV EAX, [EAX] with addr32 prefix"), | |
]; | |
for (code, desc) in test_cases { | |
// Test in 64-bit mode | |
roundtrip_test(&code, 0x1000, true) | |
.unwrap_or_else(|e| panic!("Failed {} (64-bit): {}", desc, e)); | |
// Test in 32-bit mode | |
roundtrip_test(&code, 0x1000, false) | |
.unwrap_or_else(|e| panic!("Failed {} (32-bit): {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_call_cache_behavior() { | |
// Pattern with repeated calls to the same target | |
let repeated_calls = vec![ | |
0xe8, 0x10, 0x00, 0x00, 0x00, // CALL +10h | |
0x90, 0x90, // padding | |
0xe8, 0x10, 0x00, 0x00, 0x00, // CALL +10h (same target) | |
0x90, 0x90, // padding | |
0xe8, 0x20, 0x00, 0x00, 0x00, // CALL +20h (different target) | |
0xe8, 0x10, 0x00, 0x00, 0x00, // CALL +10h (first target again) | |
]; | |
roundtrip_test(&repeated_calls, 0x1000, true) | |
.unwrap_or_else(|e| panic!("Failed call cache test: {}", e)); | |
} | |
// Test accuracy of individual functions | |
#[test] | |
fn test_shuffle_functions_edge_cases() { | |
// VEX3 shuffle with boundary values | |
assert_eq!(shuffle_vex3([0x00, 0x00]), Some(([0x80, 0x07], 0))); | |
assert_eq!(shuffle_vex3([0x10, 0x00]), None); // bit 4 set should fail | |
// VEX2 boundary test | |
let (vex, map) = shuffle_vex2([0xff]); | |
assert_eq!(unshuffle_vex2(vex, map), Some([0xff])); | |
// EVEX with various combinations | |
let test_evex = [0x07, 0xfb, 0xd7]; // max valid values | |
let (evex_out, map) = shuffle_evex(test_evex); | |
assert_eq!(unshuffle_evex(evex_out, map), Some(test_evex)); | |
} | |
#[test] | |
fn test_prefix_combinations() { | |
// Multiple prefixes in different orders | |
let test_cases = vec![ | |
// REP + OSIZE | |
(vec![0xf3, 0x66, 0xa5], 0x1000, "REP MOVSW"), | |
(vec![0x66, 0xf3, 0xa5], 0x1000, "REP MOVSW (reversed order)"), | |
// Segment override + address size | |
( | |
vec![0x2e, 0x67, 0x8b, 0x00], | |
0x1000, | |
"MOV EAX, CS:[EAX] (32-bit addr)", | |
), | |
( | |
vec![0x67, 0x2e, 0x8b, 0x00], | |
0x1000, | |
"MOV EAX, CS:[EAX] (reversed order)", | |
), | |
// Multiple segment overrides (last one wins) | |
( | |
vec![0x26, 0x2e, 0x8b, 0x00], | |
0x1000, | |
"MOV EAX, CS:[EAX] (ES then CS)", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_grp3_immediate_variants() { | |
// Test F6/F7 group 3 instructions with and without immediates | |
let test_cases = vec![ | |
// TEST with immediate (reg field 0-1) | |
(vec![0xf6, 0xc0, 0x55], 0x1000, "TEST AL, 55h"), | |
( | |
vec![0xf7, 0xc0, 0x55, 0x55, 0x55, 0x55], | |
0x1000, | |
"TEST EAX, 55555555h", | |
), | |
// NOT without immediate (reg field 2) | |
(vec![0xf6, 0xd0], 0x1000, "NOT AL"), | |
(vec![0xf7, 0xd0], 0x1000, "NOT EAX"), | |
// NEG without immediate (reg field 3) | |
(vec![0xf6, 0xd8], 0x1000, "NEG AL"), | |
(vec![0xf7, 0xd8], 0x1000, "NEG EAX"), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_special_addressing_modes() { | |
// Test various special addressing modes | |
let test_cases = vec![ | |
// Direct addressing (32-bit only) | |
( | |
vec![0xa1, 0x00, 0x00, 0x40, 0x00], | |
0x1000, | |
"MOV EAX, [400000h]", | |
), | |
// RIP-relative with large positive displacement | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0x10, 0x00], | |
0x1000, | |
"MOV EAX, [RIP+100000h]", | |
), | |
// RIP-relative with large negative displacement | |
( | |
vec![0x8b, 0x05, 0x00, 0x00, 0xf0, 0xff], | |
0x1000, | |
"MOV EAX, [RIP-100000h]", | |
), | |
// Complex SIB: [base + index*scale + disp32] | |
( | |
vec![0x8b, 0x84, 0x88, 0x00, 0x10, 0x00, 0x00], | |
0x1000, | |
"MOV EAX, [EAX+ECX*4+1000h]", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
let is_64bit = !desc.contains("[400000h]"); // Direct addressing is 32-bit only | |
roundtrip_test(&code, origin, is_64bit) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
// Also test in 32-bit mode where applicable | |
if !desc.contains("RIP") { | |
roundtrip_test(&code, origin, false) | |
.unwrap_or_else(|e| panic!("Failed {} (32-bit): {}", desc, e)); | |
} | |
} | |
} | |
#[test] | |
fn test_escape_sequences() { | |
// Test instructions that get escaped (invalid opcodes) | |
let test_cases = vec![ | |
// Omit VEX/EVEX tests as they have complex validation rules | |
// Raw bytes that should be escaped | |
(vec![0xf4], 0x1000, "HLT (should be escaped)"), | |
// Note: F1 (INT1) might be used as JUMPTAB marker, so skip it | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_boundary_instructions() { | |
// Instructions at the boundary of code sections | |
let test_cases = vec![ | |
// CALL near end of section | |
( | |
vec![0xe8, 0xfb, 0xff, 0xff, 0xff], | |
0x1000, | |
"CALL -5 (near boundary)", | |
), | |
// Jump table detection edge case | |
( | |
vec![0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00], | |
0x1000, | |
"Potential jump table data", | |
), | |
// Mixed code and data | |
( | |
vec![0x90, 0x00, 0x00, 0x00, 0x00, 0x90], | |
0x1000, | |
"NOP, data, NOP", | |
), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_rex2_prefix() { | |
// Test REX2 prefix handling (if supported) | |
let test_cases = vec![ | |
// Basic REX2 | |
(vec![0xd5, 0x00, 0x90], 0x1000, "REX2 NOP"), | |
// REX2 with various payloads | |
(vec![0xd5, 0x48, 0x89, 0xc0], 0x1000, "REX2.W MOV RAX, RAX"), | |
]; | |
for (code, origin, desc) in test_cases { | |
roundtrip_test(&code, origin, true) | |
.unwrap_or_else(|e| panic!("Failed {}: {}", desc, e)); | |
} | |
} | |
#[test] | |
fn test_stress_patterns() { | |
// Stress test with complex real-world patterns | |
let prologue_pattern = vec![ | |
0x55, // PUSH RBP | |
0x48, 0x89, 0xe5, // MOV RBP, RSP | |
0x48, 0x83, 0xec, 0x20, // SUB RSP, 20h | |
0x48, 0x89, 0x4d, 0x10, // MOV [RBP+10h], RCX | |
0x48, 0x8b, 0x45, 0x10, // MOV RAX, [RBP+10h] | |
0x48, 0x83, 0xc4, 0x20, // ADD RSP, 20h | |
0x5d, // POP RBP | |
0xc3, // RET | |
]; | |
// Simple indirect jump instead of jump table | |
let indirect_jump_pattern = vec![ | |
0xff, 0x24, 0x85, 0x00, 0x20, 0x40, 0x00, // JMP [RAX*4+402000h] | |
0x90, 0x90, 0x90, // Some padding | |
]; | |
roundtrip_test(&prologue_pattern, 0x401000, true) | |
.unwrap_or_else(|e| panic!("Failed function prologue: {}", e)); | |
roundtrip_test(&indirect_jump_pattern, 0x401000, true) | |
.unwrap_or_else(|e| panic!("Failed indirect jump pattern: {}", e)); | |
} | |
#[test] | |
fn test_jump_table_detection() { | |
// Helper function for jump table tests | |
let roundtrip_test = |code: &[u8], origin: u64, is64: bool| -> Result<(), String> { | |
let input = code.to_vec(); | |
let encoded = encode(input.clone(), origin, is64).ok_or("encoding failed")?; | |
let decoded = decode(&(&encoded).into(), is64).ok_or("decoding failed")?; | |
if input != decoded { | |
let mismatch = input | |
.iter() | |
.zip(&decoded) | |
.position(|(a, b)| a != b) | |
.unwrap_or(input.len().min(decoded.len())); | |
let input_end = (mismatch + 8).min(input.len()); | |
let decoded_end = (mismatch + 8).min(decoded.len()); | |
return Err(format!( | |
"Length mismatch or content mismatch at offset {}: \ | |
input_len={}, decoded_len={}, input[{}..{}]={:02X?}, decoded[{}..{}]={:02X?}", | |
mismatch, | |
input.len(), | |
decoded.len(), | |
mismatch, | |
input_end, | |
&input[mismatch..input_end], | |
mismatch, | |
decoded_end, | |
&decoded[mismatch..decoded_end] | |
)); | |
} | |
Ok(()) | |
}; | |
// Test case 1: Obvious jump table (should be detected) | |
let obvious_jump_table_32 = vec![ | |
// 5 entries, all within code range | |
0x00, 0x10, 0x00, 0x00, // 0x1000 | |
0x20, 0x10, 0x00, 0x00, // 0x1020 | |
0x40, 0x10, 0x00, 0x00, // 0x1040 | |
0x60, 0x10, 0x00, 0x00, // 0x1060 | |
0x80, 0x10, 0x00, 0x00, // 0x1080 | |
]; | |
// Test case 2: Not a jump table (mixed valid/invalid addresses) | |
let not_jump_table = vec![ | |
0x00, 0x10, 0x00, 0x00, // 0x1000 (valid) | |
0x00, 0x00, 0x80, 0x00, // 0x800000 (out of range) | |
0x40, 0x10, 0x00, 0x00, // 0x1040 (valid) | |
0xFF, 0xFF, 0xFF, 0xFF, // -1 (invalid) | |
]; | |
// Test case 3: Barely enough entries (exactly MIN_JUMPTAB) | |
let minimal_jump_table_32 = vec![ | |
0x00, 0x10, 0x00, 0x00, // 0x1000 | |
0x20, 0x10, 0x00, 0x00, // 0x1020 | |
0x40, 0x10, 0x00, 0x00, // 0x1040 | |
]; | |
// Test case 4: 64-bit jump table (RIP-relative offsets) | |
let jump_table_64 = vec![ | |
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0 offset | |
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // +32 offset | |
0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // +64 offset | |
0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // -32 offset | |
]; | |
// Test case 5: Data that looks like jump table but shouldn't be (wrong alignment) | |
let misaligned_data = vec![ | |
0x90, // NOP to break alignment | |
0x00, 0x10, 0x00, 0x00, // looks like address but misaligned | |
0x20, 0x10, 0x00, 0x00, 0x40, 0x10, 0x00, 0x00, | |
]; | |
// Test case 6: Instruction followed by jump table data | |
let instruction_then_data = vec![ | |
0xff, 0x24, 0x85, 0x00, 0x20, 0x40, 0x00, // JMP [RAX*4+402000h] | |
// This should NOT be detected as jump table (too few entries, wrong context) | |
0x00, 0x10, 0x40, 0x00, // 0x401000 | |
0x10, 0x10, 0x40, 0x00, // 0x401010 | |
0x20, 0x10, 0x40, 0x00, // 0x401020 | |
]; | |
println!("Testing 32-bit obvious jump table..."); | |
roundtrip_test(&obvious_jump_table_32, 0x1000, false) | |
.unwrap_or_else(|e| panic!("Failed 32-bit obvious jump table: {}", e)); | |
println!("Testing mixed data (should not be jump table)..."); | |
roundtrip_test(¬_jump_table, 0x1000, false) | |
.unwrap_or_else(|e| panic!("Failed mixed data test: {}", e)); | |
println!("Testing minimal jump table (exactly MIN_JUMPTAB entries)..."); | |
roundtrip_test(&minimal_jump_table_32, 0x1000, false) | |
.unwrap_or_else(|e| panic!("Failed minimal jump table: {}", e)); | |
println!("Testing 64-bit jump table..."); | |
// Only test 64-bit jump table at 8-byte aligned address | |
roundtrip_test(&jump_table_64, 0x1000, true) | |
.unwrap_or_else(|e| panic!("Failed 64-bit jump table: {}", e)); | |
println!("Testing misaligned data..."); | |
roundtrip_test(&misaligned_data, 0x1000, false) | |
.unwrap_or_else(|e| panic!("Failed misaligned data: {}", e)); | |
println!("Testing instruction followed by data..."); | |
roundtrip_test(&instruction_then_data, 0x401000, true) | |
.unwrap_or_else(|e| panic!("Failed instruction+data pattern: {}", e)); | |
// Additional edge case: exactly at code boundaries | |
let boundary_addresses = vec![ | |
0x00, 0x10, 0x00, 0x00, // start of range | |
0xFF, 0x1F, 0x00, 0x00, // near end of range | |
0x00, 0x10, 0x00, 0x00, // back to start | |
]; | |
println!("Testing boundary addresses..."); | |
roundtrip_test(&boundary_addresses, 0x1000, false) | |
.unwrap_or_else(|e| panic!("Failed boundary addresses: {}", e)); | |
println!("All jump table tests passed!"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment