Created
June 18, 2024 01:25
-
-
Save lifthrasiir/df47509caac2f065032ef72e70f2ec05 to your computer and use it in GitHub Desktop.
Disfilter: Fabian Giesen's x86-32 transformer, reworked for x86-64 in Rust (WIP)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// TODO: | |
// - Better error checking | |
// - Better code structure | |
// - Detect function boundary (skip CC or multi-byte nops) | |
use std::array; | |
use std::cell::Cell; | |
use std::fmt; | |
use std::fs::File; | |
use std::io::{self, BufReader, Read, Seek, SeekFrom, Write}; | |
use std::mem; | |
use bytemuck::must_cast_slice; | |
macro_rules! log_encode { | |
() => { | |
cfg!(trace_encode) | |
}; | |
} | |
macro_rules! log_decode { | |
() => { | |
cfg!(trace_decode) | |
}; | |
} | |
fn to_u16(s: &[u8]) -> u16 { | |
u16::from_le_bytes(s.try_into().unwrap()) | |
} | |
fn to_u32(s: &[u8]) -> u32 { | |
u32::from_le_bytes(s.try_into().unwrap()) | |
} | |
fn to_u64(s: &[u8]) -> u64 { | |
u64::from_le_bytes(s.try_into().unwrap()) | |
} | |
const N_: u8 = 0b0000; // no immediate | |
const N1: u8 = 0b0001; // 8-bit immediate | |
const N2: u8 = 0b0010; // 16-bit immediate | |
const N4: u8 = 0b0011; // 32/16-bit immediate, depending on operand size prefix | |
const NZ: u8 = 0b0100; // 32/64-bit immediate, depending on REX.W | |
const J1: u8 = 0b0101; // 8-bit relative jump target | |
const J4: u8 = 0b0110; // 32-bit relative jump target | |
const JA: u8 = 0b0111; // 32-bit absolute jump target | |
const A_: u8 = 0b1000; // 16/32/64-bit absolute address that is not a jump target, | |
// exact bit size depends on the instruction and context | |
const R1: u8 = 0b1001; // modR/M + 8-bit immediate | |
const XX: u8 = 0b1010; // invalid, has to be escaped | |
const R4: u8 = 0b1011; // modR/M + 32/16-bit immediate, depending on operand size prefix | |
const R_: u8 = 0b1100; // modR/M + no immediate | |
const BP: u8 = 0b1101; // 1-byte prefix that will be marked | |
const M2: u8 = 0b1110; // opcode byte + modR/M + no immediate (map 2) | |
const M3: u8 = 0b1111; // opcode byte + modR/M + 8-bit immediate (map 3) | |
#[inline(always)] | |
const fn op3_followed(flags: u8) -> bool { | |
flags >= M2 | |
} | |
#[inline(always)] | |
const fn has_modrm(flags: u8) -> bool { | |
flags >= R1 | |
} | |
#[inline(always)] | |
const fn modrm_to_imm(flags: u8) -> u8 { | |
flags & 3 | |
} | |
const _: () = { | |
assert!(N_ + 1 == N1 && N1 + 1 == N2 && N2 + 1 == N4 && N4 + 1 == NZ); | |
assert!(modrm_to_imm(R_) == N_ && modrm_to_imm(R1) == N1 && modrm_to_imm(R4) == N4); | |
assert!(M2 + 1 == M3); | |
}; | |
// https://sandpile.org/ | |
// | |
// https://github.com/torvalds/linux/blob/master/arch/x86/lib/x86-opcode-map.txt | |
// - Placeholders are an argument made of one uppercase letter and one lowercase letter. | |
// - Immediate or address is present if some placeholder starts with AIJLO. | |
// - For -b/-w/-d/-z placeholders, its size is fixed to 8/16/32/32 bits. | |
// - For -v placeholders, its size is normally 32 bits but becomes 16 bits with 66 prefixed. | |
// - -p placeholder introduces an additional 16-bit segment selector before address. | |
// - L- placeholder introduces an additional 8-bit register selector. | |
// - ModR/M byte is present if some placeholder starts with CDEGMNPQRSTUVW. | |
// 1-byte opcodes (legacy map 0) | |
const OPCODES0: [u8; 256] = [ | |
R_, R_, R_, R_, N1, N4, N_, N_, R_, R_, R_, R_, N1, N4, N_, XX, // 0 | |
R_, R_, R_, R_, N1, N4, N_, N_, R_, R_, R_, R_, N1, N4, N_, N_, // 1 | |
R_, R_, R_, R_, N1, N4, BP, N_, R_, R_, R_, R_, N1, N4, BP, N_, // 2 | |
R_, R_, R_, R_, N1, N4, BP, N_, R_, R_, R_, R_, N1, N4, BP, N_, // 3 | |
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 4 | |
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 5 | |
N_, N_, R_, R_, BP, BP, BP, BP, N4, R4, N1, R1, N_, N_, N_, N_, // 6 | |
J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, // 7 | |
R1, R4, R1, R1, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 8 | |
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, JA, N_, N_, N_, N_, N_, // 9 | |
A_, A_, A_, A_, N_, N_, N_, N_, N1, N4, N_, N_, N_, N_, N_, N_, // A | |
N1, N1, N1, N1, N1, N1, N1, N1, NZ, NZ, NZ, NZ, NZ, NZ, NZ, NZ, // B | |
R1, R1, N2, N_, R_, R_, R1, R4, N2, N_, N2, N_, N_, N1, N_, N_, // C | |
R_, R_, R_, R_, N1, N1, N_, N_, R_, R_, R_, R_, R_, R_, R_, R_, // D | |
J1, J1, J1, J1, N1, N1, N1, N1, J4, J4, A_, J1, N_, N_, N_, N_, // E | |
BP, N_, BP, BP, N_, N_, R1, R4, N_, N_, N_, N_, N_, N_, R_, R_, // F | |
]; | |
// Some 1-byte opcodes are outright invalid in x86-64. | |
const OPCODES0_I64: [u32; 8] = [ | |
//FEDCBA9876543210 FEDCBA9876543210 | |
0b0100000011000000_1100000011000000, // 1x, 0x | |
0b1000000010000000_1000000010000000, // 3x, 2x | |
0b0000000000000000_1111111111111111, // 5x, 4x | |
0b0000000000000000_0000000000000111, // 7x, 6x | |
0b0000010000000000_0000000000000100, // 9x, 8x | |
0b0000000000000000_0000000000000000, // Bx, Ax | |
0b0000000001110000_0100000000110000, // Dx, Cx | |
0b0000000000000000_0000010000000000, // Fx, Ex | |
]; | |
const PRE_VEX3: u8 = 0xc4; | |
const PRE_VEX2: u8 = 0xc5; | |
const PRE_EVEX: u8 = 0x62; | |
const PRE_REX2: u8 = 0xd5; | |
const PRE_2BYTE: u8 = 0x0f; | |
const PRE_OSIZE: u8 = 0x66; | |
const PRE_REPNE: u8 = 0xf2; | |
const PRE_REP: u8 = 0xf3; | |
const OP_CALLN: u16 = 0x0_e8; // CALL Jz | |
// Opcodes in the map 0 that need an additional 16-bit immediate. | |
const OP_CALLF: u16 = 0x0_9a; // CALL Ap (32-bit only) | |
const OP_JMPF: u16 = 0x0_ea; // JMP Ap (32-bit only) | |
const OP_ENTER: u16 = 0x0_c8; // ENTER Iw,Ib | |
// Opcodes in the map 0 that have immediates only with /0 or /1. | |
const OP_GRP3_1: u16 = 0x0_f6; // TEST Eb,Ib; NOT/NEG/[I]MUL/[I]DIV Eb | |
const OP_GRP3_2: u16 = 0x0_f7; // TEST Ev,Iv; NOT/NEG/[I]MUL/[I]DIV Ev | |
// 2-byte opcodes, starting with 0F (legacy map 1) | |
const OPCODES1: [u8; 256] = [ | |
R_, R_, N_, N_, XX, N_, N_, N_, N_, N_, XX, XX, XX, R_, N_, R1, // 0F 0 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, XX, R_, R_, // 0F 1 | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 2 | |
N_, N_, N_, N_, N_, N_, XX, N_, M2, XX, M3, XX, XX, XX, XX, XX, // 0F 3 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 4 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 5 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 6 | |
R1, R1, R1, R1, R_, R_, R_, N_, N_, R_, R_, R_, R_, R_, R_, R_, // 0F 7 | |
J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, // 0F 8 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 9 | |
N_, N_, N_, R_, R1, R_, R_, R_, N_, N_, N_, R_, R1, R_, R_, R_, // 0F A | |
R_, R_, R_, R_, R_, R_, R_, R_, N_, R_, R1, R_, R_, R_, R_, R_, // 0F B | |
R_, R_, R1, R_, R1, R1, R1, R_, N_, N_, N_, N_, N_, N_, N_, N_, // 0F C | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F D | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F E | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, XX, // 0F F | |
]; | |
// Opcode in the map 1 that has a ModR/M byte only with some prefixes. | |
const OP_JMPE_POPCNT: u16 = 0x1_b8; // JMPE Jz (IA-64 only); POPCNT Gv,Ev (with F3 prefix) | |
// EVEX opcode map 4 (largely a subset of the opcode map 0) | |
const OPCODES4: [u8; 256] = [ | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 0 | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 1 | |
R_, R_, R_, R_, R1, XX, XX, XX, R_, R_, R_, R_, R1, XX, XX, XX, // 2 | |
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 3 | |
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 4 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 5 | |
R_, R_, XX, XX, XX, R_, R_, XX, XX, R4, XX, R1, XX, XX, XX, XX, // 6 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 7 | |
R1, R4, XX, R1, N_, N_, XX, XX, R_, XX, XX, XX, XX, XX, XX, R_, // 8 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 9 | |
XX, XX, XX, XX, XX, R_, XX, XX, XX, XX, XX, XX, XX, R_, XX, R_, // A | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // B | |
R1, R1, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // C | |
R_, R_, R_, R_, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // D | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // E | |
R_, R_, R_, XX, R_, R_, R1, R4, R_, R_, XX, XX, XX, XX, R_, R_, // F | |
]; | |
const OP_MAP4_GRP3_1: u16 = 0x4_f6; | |
const OP_MAP4_GRP3_2: u16 = 0x4_f7; | |
// EVEX opcode map 7 | |
const OPCODES7: [u8; 256] = [ | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 1 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 2 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 3 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 4 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 5 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 6 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 7 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 8 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 9 | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // A | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // B | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // C | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // D | |
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // E | |
XX, XX, XX, XX, XX, XX, XX, XX, R4, XX, XX, XX, XX, XX, XX, XX, // F | |
]; | |
// Opcode in the map 7 which immediate isn't affected by the operand size prefix. | |
const OP_URDMSR_UWRMSR: u16 = 0x7_f8; // URDMSR Rq,Id; UWRMSR Id,Rq | |
// Special bytes in the ST_OP stream. They should be available in any operating modes | |
// and decode without any more operand, but yet have to be extremely unlikely to occur. | |
// | |
// - Since we regularize any *VEX & REX prefixes, a single byte REX prefix is | |
// also rewritten as a uniform marker followed by a normalized REX data byte. | |
// REX bytes themselves (40-4F) are used as an opcode map prefix instead, | |
// so the original 2-byte prefix (0F) is repurposed for the REX marker instead. | |
// | |
// - Jump tables are indicated by INT1 (F1), which is meant to be used for hardware debugging | |
// and therefore extremely unlikely to occur in the regular x86 opcode stream. | |
// | |
// - Verbatim bytes are indicated by HLT (F4), which is privileged and can only occur rarely | |
// due to its semantics, making it a good opcode to steal. | |
// | |
// The original disfilter used to use INTO (CE) instead of HLT, but it is now invalid in | |
// the long mode and has a chance to be repurposed in the future. | |
const REX_MARKER: u8 = 0x0f; | |
const JUMPTAB: u8 = 0xf1; | |
const ESC: u8 = 0xf4; | |
#[inline(always)] | |
fn lookup_opcode(op: u8, map: u8, is64: bool) -> u8 { | |
const OPCODES: [u16; 256] = { | |
let mut tab = [0u16; 256]; | |
let mut i = 0; | |
while i < 256 { | |
assert!(OPCODES0[i] | OPCODES1[i] | OPCODES4[i] | OPCODES7[i] < 16); | |
tab[i] = OPCODES0[i] as u16 | |
| (OPCODES1[i] as u16) << 4 | |
| (OPCODES4[i] as u16) << 8 | |
| (OPCODES7[i] as u16) << 12; | |
i += 1; | |
} | |
// Override escape codes. | |
assert!(OPCODES0[ESC as usize] == N_); | |
assert!(OPCODES0[JUMPTAB as usize] == N_); | |
tab[ESC as usize] = tab[ESC as usize] & !0xf | XX as u16; | |
tab[JUMPTAB as usize] = tab[JUMPTAB as usize] & !0xf | XX as u16; | |
tab | |
}; | |
const MAP_FLAGS: [u8; 16] = [ | |
0x20, 0x24, R_, R1, 0x28, R_, R_, 0x2c, XX, XX, XX, XX, XX, XX, XX, XX, | |
]; | |
if map == 0 && is64 && OPCODES0_I64[op as usize >> 5] >> (op as u32 & 31) & 1 == 1 { | |
XX | |
} else { | |
let map_flags = MAP_FLAGS[map as usize]; | |
if map_flags < 0x20 { | |
map_flags | |
} else { | |
(OPCODES[op as usize] >> (map_flags & 0x1f)) as u8 & 15 | |
} | |
} | |
} | |
#[inline(always)] | |
const fn prefix_hash(b: u8) -> u32 { | |
let b = b as u32; | |
((b << 2) ^ (b >> 2)) & 31 | |
} | |
#[inline(always)] | |
const fn has_osize_prefix(prefixes: u32) -> bool { | |
(prefixes >> prefix_hash(PRE_OSIZE)) & 1 != 0 | |
} | |
#[inline(always)] | |
const fn has_rep_prefix(prefixes: u32) -> bool { | |
prefixes & ((1 << prefix_hash(PRE_REPNE)) | (1 << prefix_hash(PRE_REP))) != 0 | |
} | |
const _: () = { | |
let prefixes = [ | |
0x26, 0x2e, 0x36, 0x3e, 0x64, 0x65, 0x66, 0x67, 0xf0, 0xf2, 0xf3, | |
]; | |
let mut bitset = 0; | |
let mut i = 0; | |
while i < prefixes.len() { | |
let b = prefixes[i]; | |
bitset |= 1u32 << prefix_hash(b); | |
assert!(OPCODES0[b as usize] == BP); | |
i += 1; | |
} | |
assert!(bitset.count_ones() == 11); | |
}; | |
macro_rules! define_streams { | |
($($i:ident $s:literal),* $(,)?) => ( | |
define_streams! { @0 $($i)* } | |
const STREAM_NAMES: [&str; NUM_STREAMS] = [$(stringify!($i)),*]; | |
const STREAM_SIZES: [usize; NUM_STREAMS] = [$($s / 8),*]; | |
); | |
(@$c:tt $i:ident $($t:tt)*) => ( | |
#[allow(dead_code)] const $i: usize = $c; | |
define_streams! { @(1 + $c) $($t)* } | |
); | |
(@$c:tt) => ( | |
const NUM_STREAMS: usize = $c; | |
); | |
} | |
define_streams! { | |
ST_OP 8, | |
ST_EVEX 8, ST_VEX 8, ST_REX 8, ST_SIB 8, | |
ST_CALL_IDX 8, | |
ST_DISP8_R0 8, ST_DISP8_R1 8, ST_DISP8_R2 8, ST_DISP8_R3 8, | |
ST_DISP8_R4 8, ST_DISP8_R5 8, ST_DISP8_R6 8, ST_DISP8_R7 8, | |
ST_DISP32 32, | |
ST_JUMP8 8, ST_JUMP32 32, ST_JUMP64 64, | |
ST_IMM8 8, ST_IMM16 16, ST_IMM32 32, ST_IMM64 64, | |
ST_ADDR16 16, ST_ADDR32 32, ST_ADDR64 64, | |
ST_CALL32 32, ST_CALL64 64, | |
ST_JUMPTAB64 64, | |
ST_PAD0 0, ST_PAD1 0, ST_PAD2 0, ST_PAD3 0, ST_PAD4 0, ST_PAD5 0, ST_PAD6 0, ST_PAD7 0, | |
ST_PAD8 0, ST_PAD9 0, ST_PAD10 0, ST_PAD11 0, ST_PAD12 0, ST_PAD13 0, ST_PAD14 0, ST_PAD15 0, | |
} | |
const ST_MODRM: usize = ST_OP; | |
const ST_AJUMP32: usize = ST_JUMP32; | |
const ST_JUMPTAB_COUNT: usize = ST_OP; | |
#[derive(Debug)] | |
pub struct Streams { | |
origin: u64, | |
streams: [Vec<u8>; NUM_STREAMS], | |
} | |
impl Streams { | |
fn new(origin: u64) -> Self { | |
Self { | |
origin, | |
streams: array::from_fn(|_| Vec::new()), | |
} | |
} | |
fn check(&self, st: usize, size: usize) { | |
let expected = STREAM_SIZES[st]; | |
if expected > 0 { | |
debug_assert_eq!(size, expected); | |
} | |
} | |
fn put8(&mut self, st: usize, v: u8) { | |
if log_encode!() { | |
print!("({}:{v:02X})", &STREAM_NAMES[st][3..]); | |
} | |
self.check(st, 1); | |
self.streams[st].push(v); | |
} | |
fn put32(&mut self, st: usize, v: u32) { | |
if log_encode!() { | |
print!("({}:{v:08X})", &STREAM_NAMES[st][3..]); | |
} | |
self.check(st, 4); | |
self.streams[st].extend_from_slice(&v.to_le_bytes()); | |
} | |
fn put64(&mut self, st: usize, v: u64) { | |
if log_encode!() { | |
print!("({}:{v:016X})", &STREAM_NAMES[st][3..]); | |
} | |
self.check(st, 8); | |
self.streams[st].extend_from_slice(&v.to_le_bytes()); | |
} | |
fn copy(&mut self, st: usize, v: &[u8]) { | |
if log_encode!() { | |
print!("({}:{})", &STREAM_NAMES[st][3..], { | |
struct Hex<'a>(&'a [u8]); | |
impl fmt::Display for Hex<'_> { | |
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
for b in self.0 { | |
write!(f, "{b:02X}")?; | |
} | |
Ok(()) | |
} | |
} | |
Hex(v) | |
}); | |
} | |
self.check(st, v.len()); | |
self.streams[st].extend_from_slice(v); | |
} | |
fn op(&mut self, map: u8, op: u8) { | |
if map > 0 || op & 0xf0 == 0x40 { | |
self.op_map(map); | |
} | |
self.put8(ST_OP, op); | |
} | |
fn op_esc(&mut self, b: u8) { | |
if log_encode!() { | |
//print!("(OP:{ESC:02X}=ESC)(OP:{b:02X})"); | |
print!("(OP:{ESC:02X})(OP:{b:02X})"); | |
} | |
self.streams[ST_OP].extend_from_slice(&[ESC, b]); | |
} | |
fn op_map(&mut self, m: u8) { | |
let b = 0x40 + m; | |
if log_encode!() { | |
//print!("(OP:{b:02X}=MAP{m:X})"); | |
print!("(OP:{b:02X})"); | |
} | |
assert!(m < 16); | |
self.streams[ST_OP].push(b); | |
} | |
fn call32(&mut self, idx: u8, target: u32) { | |
if idx == 0xff { | |
if log_encode!() { | |
print!("(CALL_IDX:{idx:02X})(CALL32:{target:08X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
self.streams[ST_CALL32].extend_from_slice(&target.to_le_bytes()); | |
} else { | |
if log_encode!() { | |
print!("(CALL_IDX:{idx:02X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
} | |
} | |
fn call64(&mut self, idx: u8, target: u64) { | |
if idx == 0xff { | |
if log_encode!() { | |
print!("(CALL_IDX:{idx:02X})(CALL64:{target:016X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
self.streams[ST_CALL64].extend_from_slice(&target.to_le_bytes()); | |
} else { | |
if log_encode!() { | |
print!("(CALL_IDX:{idx:02X})"); | |
} | |
self.streams[ST_CALL_IDX].push(idx); | |
} | |
} | |
fn jumptab(&mut self, count: u8) { | |
if log_encode!() { | |
//print!("(OP:{JUMPTAB:02X}=JUMPTAB)(JUMPTAB_COUNT:{count:02X})"); | |
print!("(OP:{JUMPTAB:02X})(OP:{count:02X})"); | |
} | |
self.streams[ST_OP].push(JUMPTAB); | |
self.streams[ST_JUMPTAB_COUNT].push(count); | |
} | |
pub fn write_to(&self, w: &mut impl Write) -> io::Result<()> { | |
let mut mask = 0u64; | |
for (i, stream) in self.streams.iter().enumerate() { | |
if !stream.is_empty() { | |
mask |= 1 << i as u64; | |
} | |
} | |
assert!(NUM_STREAMS <= 60); | |
let mut header = vec![]; | |
header.extend_from_slice(&self.origin.to_le_bytes()); | |
header.extend_from_slice(&mask.to_le_bytes()); | |
for stream in &self.streams { | |
if !stream.is_empty() { | |
header.extend_from_slice(&(stream.len() as u32).to_le_bytes()); | |
} | |
} | |
w.write_all(must_cast_slice(&header))?; | |
for stream in &self.streams { | |
w.write_all(stream)?; | |
} | |
Ok(()) | |
} | |
} | |
#[inline(always)] | |
const fn rex_has_w(rex: u8) -> bool { | |
rex & 0x08 != 0 | |
} | |
// ___ ____ ____ | |
// VEX3 RXB0mmmm WvvvvLpp -> 1vvvvLpp 0000WRXB + map mmmm | |
// ^ | |
// The bit 4 of the second byte is technically the fifth map bit, | |
// which gets ignored by the current encoding scheme | |
// because it is currently completely unused. | |
#[inline(always)] | |
fn shuffle_vex3([x, y]: [u8; 2]) -> Option<([u8; 2], u8)> { | |
if x & 0x10 != 0 { | |
return None; | |
} | |
let map = x & 0x0f; | |
let rex = (y >> 4) & 0x08 | (!x >> 5); | |
let vex = 0x80 | (y & 0x7f); | |
Some(([vex, rex], map)) | |
} | |
#[inline(always)] | |
fn unshuffle_vex3([vex, rex]: [u8; 2], map: u8) -> Option<[u8; 2]> { | |
if vex & 0x80 == 0 || rex & 0xf0 != 0 || map >= 16 { | |
return None; | |
} | |
let x = (!rex & 0x07) << 5 | map; | |
let y = (rex & 0x08) << 4 | (vex & 0x7f); | |
Some([x, y]) | |
} | |
// _____ ____ | |
// VEX2 RvvvvLpp -> 1vvvvLpp 00000R00 + map 1 (implied) | |
#[inline(always)] | |
fn shuffle_vex2([x]: [u8; 1]) -> ([u8; 2], u8) { | |
let rex = (!x >> 5) & 0x04; | |
let vex = 0x80 | (x & 0x7f); | |
([vex, rex], 1) | |
} | |
#[inline(always)] | |
fn unshuffle_vex2([vex, rex]: [u8; 2], map: u8) -> Option<[u8; 1]> { | |
if vex & 0x80 == 0 || rex & 0xfb != 0 || map != 1 { | |
return None; | |
} | |
let x = (!rex & 0x04) << 5 | (vex & 0x7f); | |
Some([x]) | |
} | |
// _____ _____ _ _____ | |
// EVEX RXBrbmmm Wvvvvxpp **L*V*** -> **0*0*** VvvvvLpp 0rxbWRXB + map mmm | |
#[inline(always)] | |
fn shuffle_evex([x, y, z]: [u8; 3]) -> ([u8; 3], u8) { | |
let map = x & 0x07; | |
let rex = (!x & 0x10) << 2 | (!x & 0x08) << 1 | (!x >> 5) | (!y & 0x04) << 3 | (y >> 4) & 0x08; | |
let vex = (y & 0x7b) | (z >> 3) & 0x04 | (z & 0x08) << 4; | |
let evex = z & 0xd7; | |
([evex, vex, rex], map) | |
} | |
#[inline(always)] | |
fn unshuffle_evex([evex, vex, rex]: [u8; 3], map: u8) -> Option<[u8; 3]> { | |
if evex & 0x28 != 0 || rex & 0x80 != 0 || map >= 8 { | |
return None; | |
} | |
let x = (!rex & 0x07) << 5 | (!rex >> 2) & 0x10 | (!rex >> 1) & 0x08 | map; | |
let y = (rex & 0x08) << 4 | (vex & 0x7b) | (!rex >> 3) & 0x04; | |
let z = evex | (vex & 0x04) << 3 | (vex >> 4) & 0x08; | |
Some([x, y, z]) | |
} | |
#[inline(always)] | |
const fn parse_modrm(modrm: u8) -> (u8 /*mode*/, u8 /*base*/) { | |
(modrm >> 6, modrm & 0b111) | |
} | |
#[inline(always)] | |
const fn modrm_is_reg_only((mode, _base): (u8, u8)) -> bool { | |
mode == 0b11 | |
} | |
#[inline(always)] | |
const fn modrm_reg(modrm: u8) -> u8 { | |
modrm >> 3 & 7 | |
} | |
#[inline(always)] | |
const fn modrm_has_sib((mode, base): (u8, u8)) -> bool { | |
mode < 0b11 && base == 0b100 | |
} | |
fn range_chunks(count: usize, chunk_size: usize) -> impl Iterator<Item = std::ops::Range<usize>> { | |
let remainder = count % chunk_size; | |
(0..count - remainder) | |
.step_by(chunk_size) | |
.map(move |start| start..start + chunk_size) | |
.chain(if remainder > 0 { | |
Some(count - remainder..count) | |
} else { | |
None | |
}) | |
} | |
// Try to recognize common function boundary padding starting from `code[0]`: | |
// - `00` (ADD Eb,Gb) | |
// - `90` (NOP) | |
// - `0F 1F /0` (NOP E[bv]) | |
// `0F 1F 00'000'reg` | |
// `0F 1F 00'000'100 zz'zzz'zzz` | |
// `0F 1F 00'000'100 zz'zzz'101 XX XX XX XX` | |
// `0F 1F 00'000'101 XX XX XX XX` | |
// `0F 1F 01'000'reg XX` | |
// `0F 1F 01'000'100 ZZ XX` | |
// `0F 1F 10'000'reg XX XX XX XX` | |
// `0F 1F 10'000'100 ZZ XX XX XX XX` | |
// - `CC` (INT3) | |
// - `89 11'reg'reg` (MOV R#,R#) | |
// - `8D 00'reg'reg` (LEA R#,[R#]) where reg != 101 | |
// - `8D 01'reg'reg 00` (LEA R#,[R#+00h]) | |
// - `8D 10'reg'reg 00 00 00 00` (LEA R#,[R#+00000000h]) | |
// - `8D 00'reg'100 zz'100'reg (LEA R#,[R#*1]) where reg != 101 | |
// - `8D 01'reg'100 zz'100'reg 00` (LEA R#,[R#*1+00h]) where reg != 101 | |
// - `8D 10'reg'100 zz'100'reg 00 00 00 00` (LEA R#,[R#*1+00000000h]) where reg != 101 | |
// - Any 1-byte prefix besides from LOCK and REX: `26 2E 36 3E 64 65 66 67 F2 F3` | |
fn scan_pad(mut code: &[u8]) -> usize { | |
let len = code.len(); | |
// Skip any 00 bytes only at the very beginning of possible padding. | |
let n = code.iter().position(|&b| b != 0x00).unwrap_or(len); | |
code = &code[n..]; | |
loop { | |
code = match code { | |
[0xcc, rest @ ..] | |
| [0x90, rest @ ..] | |
| [0x66, 0x90, rest @ ..] | |
| [0x0f, 0x1f, 0x00, rest @ ..] | |
| [0x0f, 0x1f, 0x40, _, rest @ ..] | |
| [0x0f, 0x1f, 0x44, 0x00, _, rest @ ..] | |
| [0x66, 0x0f, 0x1f, 0x44, 0x00, _, rest @ ..] | |
| [0x0f, 0x1f, 0x80, _, _, _, _, rest @ ..] | |
| [0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] | |
| [0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] | |
| [0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] | |
| [0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] => rest, | |
_ => break, | |
}; | |
} | |
len - code.len() | |
} | |
/* | |
https://stackoverflow.com/questions/25545470/long-multi-byte-nops-commonly-understood-macros-or-other-notation | |
https://gist.github.com/stevemk14ebr/d117e8d0fd1432fb2a92354a034ce5b9 | |
if(code == OP_RETNI || code == OP_RETN || code == OP_INT3) // return/padding | |
nextIsFunc = sTRUE; // next opcode is likely to be first of a new function | |
CALL 9A E8 FF/2-3 | |
JMP E9 EA EB FF/4-5 | |
JMPE 0FB8(w/o F2/F3) 0F00/5 | |
RET C2 C3 CA CB CF | |
JMPABS REX2+A1 | |
REX2 invalid: 7x Ax Ex 13x | |
JMPABS transfers program control to the 64-bit absolute address target64 given as a quadword | |
immediate. JMPABS is in legacy map 0 and requires a REX2 prefix with REX2.M0 = 0 and REX2.W = 0. All | |
other REX2 payload bits are ignored, and code-generators should set these bits to 0. JMPABS does not | |
have a ModRM byte and target64 is placed immediately after the opcode byte, so the entire instruction is | |
11 bytes long. Prefixing JMPABS with 0x66, 0x67, 0xF0, 0xF2, or 0xF3 triggers #UD. Segment overrides are | |
allowed but ignored by JMPABS. | |
padding + function target | |
*/ | |
const fn pad_followed(op: u16) -> bool { | |
// TODO | |
matches!( | |
op, | |
0x9a | 0xe8 | 0xe9 | 0xea | 0xeb | 0xc2 | 0xc3 | 0xca | 0xcb | 0xcc | 0xcf | |
) | |
} | |
struct CallCache { | |
cache: [u64; 0x100], | |
} | |
impl CallCache { | |
fn new() -> Self { | |
Self { | |
cache: [0u64; 0x100], | |
} | |
} | |
#[inline(always)] | |
fn find_index(&self, target: u64) -> u8 { | |
self.cache | |
.iter() | |
.position(|&cached| cached == target) | |
.unwrap_or(0xff) as u8 | |
} | |
#[inline(always)] | |
fn find_target(&self, idx: u8) -> Option<u64> { | |
if idx < 0xff { | |
Some(self.cache[idx as usize]) | |
} else { | |
None | |
} | |
} | |
#[inline(always)] | |
fn record(&mut self, idx: u8, target: u64) { | |
self.cache.copy_within(0..(idx as usize), 1); | |
self.cache[0] = target; | |
} | |
} | |
pub fn encode(mut code: Vec<u8>, mut origin: u64, is64: bool) -> io::Result<Streams> { | |
let mut st = Streams::new(origin); | |
let mut call_cache = CallCache::new(); | |
let mut call_idx = |target: u64| { | |
let idx = call_cache.find_index(target); | |
call_cache.record(idx, target); | |
idx | |
}; | |
let code_len = code.len(); | |
let code_start = origin; | |
let code_end = origin + code_len as u64; | |
// should be enough for this encoding scheme | |
const SENTINEL: usize = 15; | |
code.extend_from_slice(&[0u8; SENTINEL]); | |
let mut prefixes = 0; | |
let mut pad = false; | |
let mut code = &code[..]; | |
while code.len() > SENTINEL { | |
if log_encode!() { | |
println!(); | |
print!("{origin:06X}: "); | |
//print!("{:02X?} ", &code[..SENTINEL]); | |
} | |
// Try to skip any padding. | |
if pad { | |
let stream = ST_PAD0 + (origin & 0xf) as usize; | |
let pad_size = scan_pad(&code[..code.len() - SENTINEL]); | |
let mut padding; | |
(padding, code) = code.split_at(pad_size); | |
origin += pad_size as u64; | |
while padding.len() >= 0xff { | |
let chunk; | |
(chunk, padding) = padding.split_at(0xff); | |
st.put8(stream, 0xff); | |
st.copy(stream, chunk); | |
} | |
st.put8(stream, padding.len() as u8); | |
st.copy(stream, padding); | |
if false { | |
// Speculatively record the next instruction as a call target. | |
call_idx(origin); | |
} | |
pad = false; | |
continue; | |
} | |
// Detect a possible jump table of at least 3 entries. | |
const MIN_JUMPTAB: usize = 3; | |
if !is64 { | |
// TBW | |
let mut i = 0; | |
let min_addr = code_start as u32; | |
let max_addr = (code_end - 1) as u32; | |
while i < code.len() - SENTINEL - 4 { | |
let addr = to_u32(&code[i..i + 4]); | |
if addr < min_addr || max_addr < addr { | |
break; | |
} | |
i += 4; | |
} | |
if i >= MIN_JUMPTAB * 4 { | |
for range in range_chunks(i / 4, 0x100) { | |
st.jumptab((range.len() - 1) as u8); | |
for j in range { | |
let addr = to_u32(&code[j * 4..j * 4 + 4]); | |
st.call32(call_idx(addr as u64), addr); | |
} | |
} | |
code = &code[i..]; | |
origin += i as u64; | |
continue; | |
} | |
} else if origin % 8 == 0 { | |
// In x86-64, jump tables are typically 64-bit aligned offsets. | |
// Since we don't know where they will be used, | |
// we assume that they may be used anywhere within this `code`. | |
let min_offset = -(code_len as i64 - 1); | |
let max_offset = code_len as i64 - 1; | |
let mut i = 0; | |
while i < code.len() - SENTINEL - 8 { | |
let offset = to_u64(&code[i..i + 8]) as i64; | |
if offset < min_offset || max_offset < offset { | |
break; | |
} | |
i += 8; | |
} | |
if i >= MIN_JUMPTAB * 8 { | |
for range in range_chunks(i / 8, 0x100) { | |
st.jumptab((range.len() - 1) as u8); | |
for j in range { | |
st.put64(ST_JUMPTAB64, to_u64(&code[j * 8..j * 8 + 8])); | |
} | |
} | |
code = &code[i..]; | |
origin += i as u64; | |
continue; | |
} | |
} | |
let mut i = 0; | |
let mut pre = 0; | |
let mut evex = 0; | |
let mut vex = 0; | |
let mut rex = 0; | |
let mut map = 0; | |
let mut op = 0; | |
// Handle prefixes that cannot be combined first. | |
// They are all followed by ModR/M, where mode = 0b11 is required in x86-32. | |
if is64 || modrm_is_reg_only(parse_modrm(code[1])) { | |
match code[0] { | |
PRE_VEX3 => { | |
if let Some(ret) = shuffle_vex3([code[1], code[2]]) { | |
pre = PRE_VEX3; | |
([vex, rex], map) = ret; | |
op = code[3]; | |
i = 4; | |
} | |
} | |
PRE_VEX2 => { | |
pre = PRE_VEX2; | |
([vex, rex], map) = shuffle_vex2([code[1]]); | |
op = code[2]; | |
i = 3; | |
} | |
PRE_EVEX => { | |
pre = PRE_EVEX; | |
([evex, vex, rex], map) = shuffle_evex([code[1], code[2], code[3]]); | |
op = code[4]; | |
i = 5; | |
} | |
_ => {} | |
} | |
} | |
// *VEX cannot coexist with 0F or REX. | |
let has_vex = i > 0; | |
if !has_vex { | |
if is64 { | |
let c = code[0]; | |
if c & 0xf0 == 0x40 { | |
// REX (40..4F) | |
pre = REX_MARKER; | |
rex = c & 0x0f; | |
i = 1; | |
} else if c == PRE_REX2 { | |
pre = PRE_REX2; | |
rex = code[1]; | |
i = 2; | |
} | |
} | |
if code[i] == PRE_2BYTE { | |
map = 1; | |
op = code[i + 1]; | |
i += 2; | |
} else { | |
op = code[i]; | |
i += 1; | |
} | |
} | |
let i = Cell::new(i); | |
let fetch8 = || { | |
let ret = code[i.get()]; | |
i.set(i.get() + 1); | |
ret | |
}; | |
let fetch32 = || { | |
let ret = to_u32(&code[i.get()..i.get() + 4]); | |
i.set(i.get() + 4); | |
ret | |
}; | |
let copy = |n: usize, streams: &mut Streams, st: usize| { | |
debug_assert_eq!(STREAM_SIZES[st], n); | |
streams.copy(st, &code[i.get()..i.get() + n]); | |
i.set(i.get() + n); | |
}; | |
let rel_to_abs = |addr: u32, delta: usize| { | |
(origin + (i.get() + delta) as u64).wrapping_add(addr as i32 as u64) | |
}; | |
let mut flags = lookup_opcode(op, map, is64); | |
if flags == BP { | |
// 1-byte prefixes can't have any *VEX and REX prefix. | |
if i.get() != 1 { | |
flags = XX; | |
} else { | |
assert_eq!(map, 0); | |
prefixes |= 1 << prefix_hash(op); | |
st.op(map, op); | |
code = &code[1..]; | |
origin += 1; | |
continue; | |
} | |
} else if op3_followed(flags) { | |
// 3-byte opcode prefixes can't have any *VEX prefix which has its own map index. | |
if has_vex { | |
flags = XX; | |
} else { | |
assert_eq!(map, 1); | |
map = (flags - M2) + 2; | |
flags = (flags - M2) + R_; | |
op = fetch8(); | |
} | |
} | |
let mut prefixes = mem::replace(&mut prefixes, 0); | |
if flags == XX { | |
// Do NOT commit the current position if this instruction is invalid. | |
st.op_esc(code[0]); | |
code = &code[1..]; | |
origin += 1; | |
continue; | |
} | |
// Now we can commit any prefixes and opcode. | |
if pre != 0 { | |
st.put8(ST_OP, pre); | |
if pre == PRE_EVEX { | |
st.put8(ST_EVEX, evex); | |
st.put8(ST_VEX, vex); | |
} else if pre == PRE_VEX2 || pre == PRE_VEX3 { | |
st.put8(ST_VEX, vex); | |
} | |
st.put8(ST_REX, rex); | |
} | |
st.op(map, op); | |
let op = (map as u16) << 8 | op as u16; | |
match op { | |
// Parse an additional 16-bit immediate for these: | |
// | |
// 9A/EA: CALL/JMP Ap (16-bit segment + 32-bit address) | |
// C8: ENTER Iw,Ib (16-bit immediate + 8-bit immediate) | |
OP_CALLF | OP_JMPF | OP_ENTER => { | |
copy(16 / 8, &mut st, ST_IMM16); | |
} | |
// F6/F7: TEST E,I (/0-1) vs. NOT/NEG/[I]MUL/[I]DIV E (/2-7) | |
OP_GRP3_1 | OP_GRP3_2 | OP_MAP4_GRP3_1 | OP_MAP4_GRP3_2 | |
if modrm_reg(code[i.get()]) >= 2 => | |
{ | |
flags = R_; | |
} | |
// 0F B8: JMPE Jz (IA-64 only) vs. POPCNT Gv,Ev (F3) | |
OP_JMPE_POPCNT if has_rep_prefix(prefixes) => { | |
flags = R_; | |
} | |
// MAP7 F8: URDMSR Rq,Id; UWRMSR Id,Rq (immediate size doesn't depend on 66) | |
OP_URDMSR_UWRMSR => { | |
prefixes &= !(1 << prefix_hash(PRE_OSIZE)); | |
} | |
_ => {} | |
} | |
pad = pad_followed(op); | |
// ModR/M present | |
if has_modrm(flags) { | |
flags = modrm_to_imm(flags); | |
let modrm = fetch8(); | |
st.put8(ST_MODRM, modrm); | |
let (mode, base) = parse_modrm(modrm); | |
let sib; | |
if modrm_has_sib((mode, base)) { | |
sib = fetch8(); | |
st.put8(ST_SIB, sib); | |
} else { | |
sib = 0; | |
} | |
match mode { | |
0 if base == 5 => { | |
let addr = fetch32(); | |
if is64 { | |
// [eip+disp32] or [rip+disp32] | |
// Note that we haven't fully decoded operands yet, hence a delta. | |
let delta = [0, 1, 2, 4][flags as usize]; | |
st.put64(ST_ADDR64, rel_to_abs(addr, delta)); | |
} else { | |
st.put32(ST_ADDR32, addr); // [disp32] | |
} | |
} | |
0 if sib & 7 == 5 => copy(32 / 8, &mut st, ST_DISP32), // [reg*scale+disp32] | |
// [reg+disp8] or [reg*scale+disp8] | |
1 => copy(8 / 8, &mut st, ST_DISP8_R0 + base as usize), | |
2 => copy(32 / 8, &mut st, ST_DISP32), // [reg+disp32] | |
_ => {} | |
} | |
} | |
match flags { | |
J4 => { | |
let target = rel_to_abs(fetch32(), 0); | |
if op == OP_CALLN { | |
let idx = call_idx(target); | |
if is64 { | |
st.call64(idx, target); | |
} else { | |
st.call32(idx, target as u32); | |
} | |
} else { | |
if is64 { | |
st.put64(ST_JUMP64, target); | |
} else { | |
st.put32(ST_JUMP32, target as u32); | |
} | |
} | |
} | |
A_ => { | |
// EA: 32-bit only, 16-bit if 66 ("Ap" = w:z) | |
// Ax: 32-bit or 64-bit, fixed per operating mode ("Ov") | |
let lgn = if is64 { | |
3 | |
} else if op == OP_JMPF && has_osize_prefix(prefixes) { | |
1 | |
} else { | |
2 | |
}; | |
copy(1 << lgn, &mut st, (ST_ADDR16 - 1) + lgn); | |
} | |
JA => copy(32 / 8, &mut st, ST_AJUMP32), | |
J1 => copy(8 / 8, &mut st, ST_JUMP8), | |
N_ => {} | |
_ => { | |
assert!(matches!(flags, N1 | N2 | N4 | NZ)); | |
if flags == N4 && has_osize_prefix(prefixes) { | |
flags = N2; | |
} | |
if flags == NZ && !rex_has_w(rex) { | |
flags = N4; | |
} | |
let lgn = (flags - N1) as usize; | |
copy(1 << lgn, &mut st, ST_IMM8 + lgn); | |
} | |
} | |
let i = i.get(); | |
code = &code[i..]; | |
origin += i as u64; | |
} | |
Ok(st) | |
} | |
pub fn decode(streams: &Streams, is64: bool) -> Option<Vec<u8>> { | |
let origin = streams.origin; | |
let streams = streams | |
.streams | |
.each_ref() | |
.map(|stream| Cell::new(&stream[..])); | |
let pc = Cell::new(origin); | |
let mut code = Vec::new(); | |
let read8 = |st: usize| { | |
if log_decode!() { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (&[head], tail) = streams[st].get().split_first_chunk::<1>()?; | |
if log_decode!() { | |
print!("{head:02X})"); | |
} | |
streams[st].set(tail); | |
Some(head) | |
}; | |
let read32 = |st: usize| { | |
if log_decode!() { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (&head, tail) = streams[st].get().split_first_chunk::<4>()?; | |
let head = u32::from_le_bytes(head); | |
if log_decode!() { | |
print!("{head:08X})"); | |
} | |
streams[st].set(tail); | |
Some(head) | |
}; | |
let read64 = |st: usize| { | |
if log_decode!() { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (&head, tail) = streams[st].get().split_first_chunk::<8>()?; | |
let head = u64::from_le_bytes(head); | |
if log_decode!() { | |
print!("{head:016X})"); | |
} | |
streams[st].set(tail); | |
Some(head) | |
}; | |
let mut call_cache = CallCache::new(); | |
let read_call = |cache: &mut CallCache| { | |
let idx = read8(ST_CALL_IDX)?; | |
let target = if let Some(target) = cache.find_target(idx) { | |
target | |
} else if is64 { | |
read64(ST_CALL64)? | |
} else { | |
read32(ST_CALL32)? as u64 | |
}; | |
cache.record(idx, target); | |
Some(target) | |
}; | |
let copy = |n: usize, code: &mut Vec<u8>, st: usize| { | |
if log_decode!() { | |
print!("({}:", &STREAM_NAMES[st][3..]); | |
} | |
let (head, tail) = streams[st].get().split_at(n); | |
if log_decode!() { | |
for &b in head { | |
print!("{:02X}", b); | |
} | |
print!(")"); | |
} | |
code.extend_from_slice(head); | |
streams[st].set(tail); | |
Some(()) | |
}; | |
macro_rules! fatal { | |
($fmt:tt) => { | |
panic!( | |
concat!($fmt, " @ {:06X} {:02X?}"), | |
pc.get(), | |
&code[code.len().max(15) - 15..] | |
) | |
}; | |
} | |
let mut prefixes = 0; | |
let mut pad = false; | |
while !streams[ST_OP].get().is_empty() { | |
pc.set(origin + code.len() as u64); | |
if log_decode!() { | |
println!(); | |
print!("{:06X}: ", pc.get()); | |
} | |
if pad { | |
let stream = ST_PAD0 + ((origin as usize + code.len()) & 0xf); | |
loop { | |
let pad_size = read8(stream)?; | |
copy(pad_size as usize, &mut code, stream)?; | |
if pad_size < 0xff { | |
break; | |
} | |
} | |
if false { | |
// Speculatively record the next instruction as a call target. | |
let target = origin + code.len() as u64; | |
let idx = call_cache.find_index(target); | |
call_cache.record(idx, target); | |
} | |
pad = false; | |
continue; | |
} | |
let mut op = read8(ST_OP).unwrap(); | |
if op == ESC { | |
code.push(read8(ST_OP)?); | |
continue; | |
} | |
if op == JUMPTAB { | |
let count = read8(ST_JUMPTAB_COUNT)? as usize + 1; | |
if is64 { | |
for _ in 0..count { | |
code.extend_from_slice(&read64(ST_JUMPTAB64)?.to_le_bytes()); | |
} | |
} else { | |
for _ in 0..count { | |
code.extend_from_slice(&(read_call(&mut call_cache)? as u32).to_le_bytes()); | |
} | |
} | |
continue; | |
} | |
let (pre, evex, vex, rex) = match op { | |
PRE_VEX3 => (op, 0, read8(ST_VEX)?, read8(ST_REX)?), | |
PRE_VEX2 => (op, 0, read8(ST_VEX)?, read8(ST_REX)?), | |
PRE_EVEX => (op, read8(ST_EVEX)?, read8(ST_VEX)?, read8(ST_REX)?), | |
REX_MARKER | PRE_REX2 if !is64 => fatal!("unsupported REX prefixes in x86-32"), | |
REX_MARKER => (op, 0, 0, read8(ST_REX)?), | |
PRE_REX2 => (op, 0, 0, read8(ST_REX)?), | |
_ => (0, 0, 0, 0), | |
}; | |
if pre != 0 { | |
op = read8(ST_OP)?; | |
} | |
let map; | |
if op & 0xf0 == 0x40 { | |
map = op & 0x0f; | |
op = read8(ST_OP)?; | |
} else { | |
map = 0; | |
} | |
let mut flags = lookup_opcode(op, map, is64); | |
if flags == XX { | |
fatal!("invalid opcode"); | |
} else if flags == BP { | |
assert_eq!(map, 0); | |
prefixes |= 1 << prefix_hash(op); | |
code.push(op); | |
continue; | |
} | |
'prefix: { | |
match pre { | |
PRE_VEX3 => { | |
let Some([x, y]) = unshuffle_vex3([vex, rex], map) else { | |
fatal!("bad VEX3 prefix"); | |
}; | |
code.extend_from_slice(&[PRE_VEX3, x, y, op]); | |
break 'prefix; | |
} | |
PRE_VEX2 => { | |
let Some([x]) = unshuffle_vex2([vex, rex], map) else { | |
fatal!("bad VEX2 prefix"); | |
}; | |
code.extend_from_slice(&[PRE_VEX2, x, op]); | |
break 'prefix; | |
} | |
PRE_EVEX => { | |
let Some([x, y, z]) = unshuffle_evex([evex, vex, rex], map) else { | |
fatal!("bad EVEX prefix"); | |
}; | |
code.extend_from_slice(&[PRE_EVEX, x, y, z, op]); | |
break 'prefix; | |
} | |
REX_MARKER => { | |
if rex & 0xf0 != 0 { | |
fatal!("bad REX prefix"); | |
} | |
code.push(0x40 | rex); | |
} | |
PRE_REX2 => code.extend_from_slice(&[PRE_REX2, rex]), | |
0 => {} | |
_ => unreachable!(), | |
} | |
// Only applicable with non-*VEX prefixes. | |
match map { | |
0 => code.push(op), | |
1 => code.extend_from_slice(&[0x0f, op]), | |
2 => code.extend_from_slice(&[0x0f, 0x38, op]), | |
3 => code.extend_from_slice(&[0x0f, 0x3a, op]), | |
_ => fatal!("bad opcode map"), | |
} | |
} | |
let mut prefixes = mem::replace(&mut prefixes, 0); | |
let op = (map as u16) << 8 | op as u16; | |
match op { | |
// Parse an additional 16-bit immediate for these: | |
// | |
// 9A/EA: CALL/JMP Ap (16-bit segment + 32-bit address) | |
// C8: ENTER Iw,Ib (16-bit immediate + 8-bit immediate) | |
OP_CALLF | OP_JMPF | OP_ENTER => { | |
copy(16 / 8, &mut code, ST_IMM16)?; | |
} | |
// F6/F7: TEST E,I (/0-1) vs. NOT/NEG/[I]MUL/[I]DIV E (/2-7) | |
OP_GRP3_1 | OP_GRP3_2 | OP_MAP4_GRP3_1 | OP_MAP4_GRP3_2 | |
if modrm_reg(streams[ST_MODRM].get()[0]) >= 2 => | |
{ | |
flags = R_; | |
} | |
// 0F B8: JMPE Jz (IA-64 only) vs. POPCNT Gv,Ev (F3) | |
OP_JMPE_POPCNT if has_rep_prefix(prefixes) => { | |
flags = R_; | |
} | |
// MAP7 F8: URDMSR Rq,Id; UWRMSR Id,Rq (immediate size doesn't depend on 66) | |
OP_URDMSR_UWRMSR => { | |
prefixes &= !(1 << prefix_hash(PRE_OSIZE)); | |
} | |
_ => {} | |
} | |
pad = pad_followed(op); | |
let abs_to_rel = |addr: u64, code: &[u8], delta: usize| { | |
addr.wrapping_sub(origin + (code.len() + delta) as u64) | |
}; | |
// ModR/M present | |
if has_modrm(flags) { | |
flags = modrm_to_imm(flags); | |
let modrm = read8(ST_MODRM)?; | |
code.push(modrm); | |
let (mode, base) = parse_modrm(modrm); | |
let sib; | |
if modrm_has_sib((mode, base)) { | |
sib = read8(ST_SIB)?; | |
code.push(sib); | |
} else { | |
sib = 0; | |
} | |
match mode { | |
0 if base == 5 => { | |
let addr = if is64 { | |
// [eip+disp32] or [rip+disp32] | |
// Note that we haven't fully decoded operands yet, hence a delta. | |
let delta = [0, 1, 2, 4][flags as usize]; | |
abs_to_rel(read64(ST_ADDR64)?, &code, delta + 4) as u32 | |
} else { | |
read32(ST_ADDR32)? // [disp32] | |
}; | |
code.extend_from_slice(&addr.to_le_bytes()); | |
} | |
// [reg*scale+disp32] | |
0 if sib & 7 == 5 => copy(32 / 8, &mut code, ST_DISP32)?, | |
// [reg+disp8] or [reg*scale+disp8] | |
1 => copy(8 / 8, &mut code, ST_DISP8_R0 + base as usize)?, | |
2 => copy(32 / 8, &mut code, ST_DISP32)?, // [reg+disp32] | |
_ => {} | |
} | |
} | |
match flags { | |
J4 => { | |
let target = if op == OP_CALLN { | |
read_call(&mut call_cache)? | |
} else if is64 { | |
read64(ST_JUMP64)? | |
} else { | |
read32(ST_JUMP32)? as u64 | |
}; | |
let target = abs_to_rel(target as u64, &code, 4) as u32; | |
code.extend_from_slice(&target.to_le_bytes()); | |
} | |
A_ => { | |
// EA: 32-bit only, 16-bit if 66 ("Ap" = w:z) | |
// Ax: 32-bit or 64-bit, fixed per operating mode ("Ov") | |
let lgn = if is64 { | |
3 | |
} else if op == OP_JMPF && has_osize_prefix(prefixes) { | |
1 | |
} else { | |
2 | |
}; | |
copy(1 << lgn, &mut code, (ST_ADDR16 - 1) + lgn)?; | |
} | |
JA => copy(32 / 8, &mut code, ST_AJUMP32)?, | |
J1 => copy(8 / 8, &mut code, ST_JUMP8)?, | |
N_ => {} | |
_ => { | |
assert!(matches!(flags, N1 | N2 | N4 | NZ)); | |
if flags == N4 && has_osize_prefix(prefixes) { | |
flags = N2; | |
} | |
if flags == NZ && !rex_has_w(rex) { | |
flags = N4; | |
} | |
let lgn = (flags - N1) as usize; | |
copy(1 << lgn, &mut code, ST_IMM8 + lgn)?; | |
} | |
} | |
} | |
Some(code) | |
} | |
pub fn locate_code(f: &mut File) -> io::Result<Vec<(u64, u64, usize)>> { | |
f.seek(SeekFrom::Start(0))?; | |
let mut r = BufReader::new(f); | |
// MZ header | |
let mut buf = [0u8; 0x40]; | |
r.read_exact(&mut buf)?; | |
let sig = to_u16(&buf[0..2]); | |
if sig != 0x5a4d { | |
return Err(io::Error::other(format!("bad MZ signature {sig:#x}"))); | |
} | |
let pe_offset = to_u32(&buf[0x3c..0x40]); | |
if pe_offset < 0x40 { | |
return Err(io::Error::other(format!( | |
"too low offset to PE header {pe_offset:#x}" | |
))); | |
} | |
r.seek_relative(pe_offset as i64 - 0x40)?; | |
// PE header | |
let mut buf = [0u8; 0x18]; | |
r.read_exact(&mut buf)?; | |
let sig = to_u32(&buf[0..4]); | |
if sig != 0x4550 { | |
return Err(io::Error::other(format!("bad PE signature {sig:#x}"))); | |
} | |
let num_sections = to_u16(&buf[6..8]); | |
let opt_header_size = to_u16(&buf[0x14..0x16]) as usize; | |
// PE optional header | |
let mut opt_header = vec![0u8; opt_header_size]; | |
r.read_exact(&mut opt_header)?; | |
let magic = to_u16(&opt_header[0..2]); | |
match magic { | |
0x10b => { | |
// IMAGE_OPTIONAL_HEADER32 | |
todo!(); | |
} | |
0x20b => { | |
// IMAGE_OPTIONAL_HEADER64 | |
if opt_header_size < 0x60 { | |
return Err(io::Error::other(format!( | |
"PE64 optional header too small ({opt_header_size:#x} < 0x60)" | |
))); | |
} | |
let num_data_dirs = to_u32(&opt_header[0x5c..0x60]) as usize; | |
let min_size = 0x60 + num_data_dirs * 0x10; | |
if opt_header_size < min_size { | |
return Err(io::Error::other(format!( | |
"PE64 optional header too small ({opt_header_size:#x} < {min_size:#x})" | |
))); | |
} | |
// data directories: | |
// EXPORT, IMPORT, RESOURCE, EXCEPTION, SECURITY, BASERELOC, DEBUG, COPYRIGHT, | |
// GLOBALPTR, TLS, LOAD_CONFIG, BOUND_IMPORT, IAT, DELAY_IMPORT, COM_DESCRIPTOR, - | |
} | |
_ => { | |
return Err(io::Error::other(format!( | |
"bad PE optional header magic {magic:#x}" | |
))) | |
} | |
} | |
// section headers | |
let mut exec_sections = Vec::new(); | |
for _ in 0..num_sections { | |
let mut buf = [0u8; 40]; | |
r.read_exact(&mut buf)?; | |
let name = &buf[..8]; | |
let _name = String::from_utf8_lossy(name); | |
let rva = to_u32(&buf[12..16]); | |
let stored_size = to_u32(&buf[16..20]); | |
let stored_offset = to_u32(&buf[20..24]); | |
let flags = to_u32(&buf[36..40]); | |
//println!("section {_name:?}: rva {rva:#x}, stored {stored_offset:#x} + {stored_size:#x}, flags {flags:#x}"); | |
if flags & 0x20 != 0 { | |
exec_sections.push((rva as u64, stored_offset as u64, stored_size as usize)); | |
} | |
} | |
Ok(exec_sections) | |
} | |
#[test] | |
fn test_disfilter() -> io::Result<()> { | |
std::env::set_var("RUST_LOG", "trace"); | |
env_logger::init(); | |
let mut f = File::open(r"c:\Program Files\ImageMagick-7.1.1-Q16-HDRI\ffmpeg.exe")?; | |
for (origin, offset, size) in locate_code(&mut f)? { | |
f.seek(SeekFrom::Start(offset))?; | |
let mut input = vec![0u8; size]; | |
f.read_exact(&mut input)?; | |
File::create(r"x:\unfiltered.bin")?.write_all(&input)?; | |
let start = std::time::Instant::now(); | |
let streams = encode(input.clone(), origin, true)?; | |
let enc_rate = size as f64 / start.elapsed().as_secs_f64() / 1e6; | |
streams.write_to(&mut File::create(r"x:\filtered.bin")?)?; | |
let start = std::time::Instant::now(); | |
let recons = decode(&streams, true).expect("round trip failed"); | |
let dec_rate = size as f64 / start.elapsed().as_secs_f64() / 1e6; | |
if input != recons { | |
let mismatch = std::iter::zip(&input, &recons) | |
.position(|(a, b)| a != b) | |
.unwrap(); | |
let lo = mismatch.max(15) - 15; | |
let hi = mismatch + 15; | |
panic!( | |
"input != recons\n \ | |
Input: {mismatch}/{} {:02X?}\n \ | |
Recons: {mismatch}/{} {:02X?}", | |
input.len(), | |
&input[lo..hi.min(input.len())], | |
recons.len(), | |
&recons[lo..hi.min(recons.len())], | |
); | |
} | |
eprintln!("Disfilter: encoding {enc_rate:.2} MB/s, decoding {dec_rate:.2} MB/s"); | |
break; | |
} | |
Ok(()) | |
} | |
#[test] | |
fn test_call_cache() { | |
let mut expected = vec![ | |
(1234, 0xff), | |
(1234, 0), | |
(5678, 0xff), | |
(1234, 1), | |
(1234, 0), | |
(5678, 1), | |
]; | |
for i in 9000..9256 { | |
expected.push((i, 0xff)); | |
} | |
expected.push((1234, 0xff)); | |
expected.push((5678, 0xff)); | |
let mut cache = CallCache::new(); | |
for &(target, idx) in &expected { | |
assert_eq!(cache.find_index(target), idx); | |
cache.record(idx, target); | |
} | |
let mut cache = CallCache::new(); | |
for &(target, idx) in &expected { | |
if idx == 0xff { | |
assert_eq!(cache.find_target(idx), None); | |
} else { | |
assert_eq!(cache.find_target(idx), Some(target)); | |
} | |
cache.record(idx, target); | |
} | |
} | |
#[cfg(test)] | |
fn test_shuffle<const IN: usize, const OUT: usize>( | |
shuffle: impl Fn([u8; IN]) -> Option<([u8; OUT], u8)>, | |
unshuffle: impl Fn([u8; OUT], u8) -> Option<[u8; IN]>, | |
map_range: impl Iterator<Item = u8> + Clone, | |
) { | |
#[inline(always)] | |
fn generate<const N: usize>() -> impl Iterator<Item = [u8; N]> { | |
assert!(N <= 3); | |
(0..1u32 << (N * 8)).map(|n| { | |
let mut b = [0u8; N]; | |
for i in 0..N { | |
b[i] = (n >> (i * 8) & 0xff) as u8; | |
} | |
b | |
}) | |
} | |
let mut seen = std::collections::HashSet::new(); | |
for i in generate::<IN>() { | |
if let Some((o, map)) = shuffle(i) { | |
assert_eq!( | |
unshuffle(o, map), | |
Some(i), | |
"{i:02X?} -> {o:02X?} + {map} -> (roundtrip failed)" | |
); | |
assert!(seen.insert((o, map)), "{i:02X?} -> {o:02X?} + {map} (dupe)"); | |
} | |
} | |
for o in generate::<OUT>() { | |
for map in map_range.clone() { | |
if !seen.contains(&(o, map)) { | |
assert_eq!( | |
unshuffle(o, map), | |
None, | |
"? <- {o:02X?} + {map} (didn't fail)" | |
); | |
} | |
} | |
} | |
} | |
#[test] | |
fn test_shuffle_vex3() { | |
test_shuffle(shuffle_vex3, unshuffle_vex3, 0..=16); | |
} | |
#[test] | |
fn test_shuffle_vex2() { | |
test_shuffle(|i| Some(shuffle_vex2(i)), unshuffle_vex2, 0..=16); | |
} | |
#[test] | |
fn test_shuffle_evex() { | |
test_shuffle( | |
|i| Some(shuffle_evex(i)), | |
unshuffle_evex, | |
[5, 8].into_iter(), | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment