Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Created June 18, 2024 01:25
Show Gist options
  • Save lifthrasiir/df47509caac2f065032ef72e70f2ec05 to your computer and use it in GitHub Desktop.
Save lifthrasiir/df47509caac2f065032ef72e70f2ec05 to your computer and use it in GitHub Desktop.
Disfilter: Fabian Giesen's x86-32 transformer, reworked for x86-64 in Rust (WIP)
// TODO:
// - Better error checking
// - Better code structure
// - Detect function boundary (skip CC or multi-byte nops)
use std::array;
use std::cell::Cell;
use std::fmt;
use std::fs::File;
use std::io::{self, BufReader, Read, Seek, SeekFrom, Write};
use std::mem;
use bytemuck::must_cast_slice;
macro_rules! log_encode {
() => {
cfg!(trace_encode)
};
}
macro_rules! log_decode {
() => {
cfg!(trace_decode)
};
}
fn to_u16(s: &[u8]) -> u16 {
u16::from_le_bytes(s.try_into().unwrap())
}
fn to_u32(s: &[u8]) -> u32 {
u32::from_le_bytes(s.try_into().unwrap())
}
fn to_u64(s: &[u8]) -> u64 {
u64::from_le_bytes(s.try_into().unwrap())
}
const N_: u8 = 0b0000; // no immediate
const N1: u8 = 0b0001; // 8-bit immediate
const N2: u8 = 0b0010; // 16-bit immediate
const N4: u8 = 0b0011; // 32/16-bit immediate, depending on operand size prefix
const NZ: u8 = 0b0100; // 32/64-bit immediate, depending on REX.W
const J1: u8 = 0b0101; // 8-bit relative jump target
const J4: u8 = 0b0110; // 32-bit relative jump target
const JA: u8 = 0b0111; // 32-bit absolute jump target
const A_: u8 = 0b1000; // 16/32/64-bit absolute address that is not a jump target,
// exact bit size depends on the instruction and context
const R1: u8 = 0b1001; // modR/M + 8-bit immediate
const XX: u8 = 0b1010; // invalid, has to be escaped
const R4: u8 = 0b1011; // modR/M + 32/16-bit immediate, depending on operand size prefix
const R_: u8 = 0b1100; // modR/M + no immediate
const BP: u8 = 0b1101; // 1-byte prefix that will be marked
const M2: u8 = 0b1110; // opcode byte + modR/M + no immediate (map 2)
const M3: u8 = 0b1111; // opcode byte + modR/M + 8-bit immediate (map 3)
#[inline(always)]
const fn op3_followed(flags: u8) -> bool {
flags >= M2
}
#[inline(always)]
const fn has_modrm(flags: u8) -> bool {
flags >= R1
}
#[inline(always)]
const fn modrm_to_imm(flags: u8) -> u8 {
flags & 3
}
const _: () = {
assert!(N_ + 1 == N1 && N1 + 1 == N2 && N2 + 1 == N4 && N4 + 1 == NZ);
assert!(modrm_to_imm(R_) == N_ && modrm_to_imm(R1) == N1 && modrm_to_imm(R4) == N4);
assert!(M2 + 1 == M3);
};
// https://sandpile.org/
//
// https://github.com/torvalds/linux/blob/master/arch/x86/lib/x86-opcode-map.txt
// - Placeholders are an argument made of one uppercase letter and one lowercase letter.
// - Immediate or address is present if some placeholder starts with AIJLO.
// - For -b/-w/-d/-z placeholders, its size is fixed to 8/16/32/32 bits.
// - For -v placeholders, its size is normally 32 bits but becomes 16 bits with 66 prefixed.
// - -p placeholder introduces an additional 16-bit segment selector before address.
// - L- placeholder introduces an additional 8-bit register selector.
// - ModR/M byte is present if some placeholder starts with CDEGMNPQRSTUVW.
// 1-byte opcodes (legacy map 0)
const OPCODES0: [u8; 256] = [
R_, R_, R_, R_, N1, N4, N_, N_, R_, R_, R_, R_, N1, N4, N_, XX, // 0
R_, R_, R_, R_, N1, N4, N_, N_, R_, R_, R_, R_, N1, N4, N_, N_, // 1
R_, R_, R_, R_, N1, N4, BP, N_, R_, R_, R_, R_, N1, N4, BP, N_, // 2
R_, R_, R_, R_, N1, N4, BP, N_, R_, R_, R_, R_, N1, N4, BP, N_, // 3
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 4
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, // 5
N_, N_, R_, R_, BP, BP, BP, BP, N4, R4, N1, R1, N_, N_, N_, N_, // 6
J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, J1, // 7
R1, R4, R1, R1, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 8
N_, N_, N_, N_, N_, N_, N_, N_, N_, N_, JA, N_, N_, N_, N_, N_, // 9
A_, A_, A_, A_, N_, N_, N_, N_, N1, N4, N_, N_, N_, N_, N_, N_, // A
N1, N1, N1, N1, N1, N1, N1, N1, NZ, NZ, NZ, NZ, NZ, NZ, NZ, NZ, // B
R1, R1, N2, N_, R_, R_, R1, R4, N2, N_, N2, N_, N_, N1, N_, N_, // C
R_, R_, R_, R_, N1, N1, N_, N_, R_, R_, R_, R_, R_, R_, R_, R_, // D
J1, J1, J1, J1, N1, N1, N1, N1, J4, J4, A_, J1, N_, N_, N_, N_, // E
BP, N_, BP, BP, N_, N_, R1, R4, N_, N_, N_, N_, N_, N_, R_, R_, // F
];
// Some 1-byte opcodes are outright invalid in x86-64.
const OPCODES0_I64: [u32; 8] = [
//FEDCBA9876543210 FEDCBA9876543210
0b0100000011000000_1100000011000000, // 1x, 0x
0b1000000010000000_1000000010000000, // 3x, 2x
0b0000000000000000_1111111111111111, // 5x, 4x
0b0000000000000000_0000000000000111, // 7x, 6x
0b0000010000000000_0000000000000100, // 9x, 8x
0b0000000000000000_0000000000000000, // Bx, Ax
0b0000000001110000_0100000000110000, // Dx, Cx
0b0000000000000000_0000010000000000, // Fx, Ex
];
const PRE_VEX3: u8 = 0xc4;
const PRE_VEX2: u8 = 0xc5;
const PRE_EVEX: u8 = 0x62;
const PRE_REX2: u8 = 0xd5;
const PRE_2BYTE: u8 = 0x0f;
const PRE_OSIZE: u8 = 0x66;
const PRE_REPNE: u8 = 0xf2;
const PRE_REP: u8 = 0xf3;
const OP_CALLN: u16 = 0x0_e8; // CALL Jz
// Opcodes in the map 0 that need an additional 16-bit immediate.
const OP_CALLF: u16 = 0x0_9a; // CALL Ap (32-bit only)
const OP_JMPF: u16 = 0x0_ea; // JMP Ap (32-bit only)
const OP_ENTER: u16 = 0x0_c8; // ENTER Iw,Ib
// Opcodes in the map 0 that have immediates only with /0 or /1.
const OP_GRP3_1: u16 = 0x0_f6; // TEST Eb,Ib; NOT/NEG/[I]MUL/[I]DIV Eb
const OP_GRP3_2: u16 = 0x0_f7; // TEST Ev,Iv; NOT/NEG/[I]MUL/[I]DIV Ev
// 2-byte opcodes, starting with 0F (legacy map 1)
const OPCODES1: [u8; 256] = [
R_, R_, N_, N_, XX, N_, N_, N_, N_, N_, XX, XX, XX, R_, N_, R1, // 0F 0
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, XX, R_, R_, // 0F 1
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 2
N_, N_, N_, N_, N_, N_, XX, N_, M2, XX, M3, XX, XX, XX, XX, XX, // 0F 3
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 4
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 5
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 6
R1, R1, R1, R1, R_, R_, R_, N_, N_, R_, R_, R_, R_, R_, R_, R_, // 0F 7
J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, J4, // 0F 8
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F 9
N_, N_, N_, R_, R1, R_, R_, R_, N_, N_, N_, R_, R1, R_, R_, R_, // 0F A
R_, R_, R_, R_, R_, R_, R_, R_, N_, R_, R1, R_, R_, R_, R_, R_, // 0F B
R_, R_, R1, R_, R1, R1, R1, R_, N_, N_, N_, N_, N_, N_, N_, N_, // 0F C
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F D
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 0F E
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, XX, // 0F F
];
// Opcode in the map 1 that has a ModR/M byte only with some prefixes.
const OP_JMPE_POPCNT: u16 = 0x1_b8; // JMPE Jz (IA-64 only); POPCNT Gv,Ev (with F3 prefix)
// EVEX opcode map 4 (largely a subset of the opcode map 0)
const OPCODES4: [u8; 256] = [
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 0
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 1
R_, R_, R_, R_, R1, XX, XX, XX, R_, R_, R_, R_, R1, XX, XX, XX, // 2
R_, R_, R_, R_, XX, XX, XX, XX, R_, R_, R_, R_, XX, XX, XX, XX, // 3
R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, R_, // 4
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 5
R_, R_, XX, XX, XX, R_, R_, XX, XX, R4, XX, R1, XX, XX, XX, XX, // 6
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 7
R1, R4, XX, R1, N_, N_, XX, XX, R_, XX, XX, XX, XX, XX, XX, R_, // 8
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 9
XX, XX, XX, XX, XX, R_, XX, XX, XX, XX, XX, XX, XX, R_, XX, R_, // A
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // B
R1, R1, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // C
R_, R_, R_, R_, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // D
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // E
R_, R_, R_, XX, R_, R_, R1, R4, R_, R_, XX, XX, XX, XX, R_, R_, // F
];
const OP_MAP4_GRP3_1: u16 = 0x4_f6;
const OP_MAP4_GRP3_2: u16 = 0x4_f7;
// EVEX opcode map 7
const OPCODES7: [u8; 256] = [
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 0
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 1
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 2
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 3
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 4
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 5
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 6
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 7
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 8
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // 9
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // A
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // B
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // C
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // D
XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, XX, // E
XX, XX, XX, XX, XX, XX, XX, XX, R4, XX, XX, XX, XX, XX, XX, XX, // F
];
// Opcode in the map 7 which immediate isn't affected by the operand size prefix.
const OP_URDMSR_UWRMSR: u16 = 0x7_f8; // URDMSR Rq,Id; UWRMSR Id,Rq
// Special bytes in the ST_OP stream. They should be available in any operating modes
// and decode without any more operand, but yet have to be extremely unlikely to occur.
//
// - Since we regularize any *VEX & REX prefixes, a single byte REX prefix is
// also rewritten as a uniform marker followed by a normalized REX data byte.
// REX bytes themselves (40-4F) are used as an opcode map prefix instead,
// so the original 2-byte prefix (0F) is repurposed for the REX marker instead.
//
// - Jump tables are indicated by INT1 (F1), which is meant to be used for hardware debugging
// and therefore extremely unlikely to occur in the regular x86 opcode stream.
//
// - Verbatim bytes are indicated by HLT (F4), which is privileged and can only occur rarely
// due to its semantics, making it a good opcode to steal.
//
// The original disfilter used to use INTO (CE) instead of HLT, but it is now invalid in
// the long mode and has a chance to be repurposed in the future.
const REX_MARKER: u8 = 0x0f;
const JUMPTAB: u8 = 0xf1;
const ESC: u8 = 0xf4;
#[inline(always)]
fn lookup_opcode(op: u8, map: u8, is64: bool) -> u8 {
const OPCODES: [u16; 256] = {
let mut tab = [0u16; 256];
let mut i = 0;
while i < 256 {
assert!(OPCODES0[i] | OPCODES1[i] | OPCODES4[i] | OPCODES7[i] < 16);
tab[i] = OPCODES0[i] as u16
| (OPCODES1[i] as u16) << 4
| (OPCODES4[i] as u16) << 8
| (OPCODES7[i] as u16) << 12;
i += 1;
}
// Override escape codes.
assert!(OPCODES0[ESC as usize] == N_);
assert!(OPCODES0[JUMPTAB as usize] == N_);
tab[ESC as usize] = tab[ESC as usize] & !0xf | XX as u16;
tab[JUMPTAB as usize] = tab[JUMPTAB as usize] & !0xf | XX as u16;
tab
};
const MAP_FLAGS: [u8; 16] = [
0x20, 0x24, R_, R1, 0x28, R_, R_, 0x2c, XX, XX, XX, XX, XX, XX, XX, XX,
];
if map == 0 && is64 && OPCODES0_I64[op as usize >> 5] >> (op as u32 & 31) & 1 == 1 {
XX
} else {
let map_flags = MAP_FLAGS[map as usize];
if map_flags < 0x20 {
map_flags
} else {
(OPCODES[op as usize] >> (map_flags & 0x1f)) as u8 & 15
}
}
}
#[inline(always)]
const fn prefix_hash(b: u8) -> u32 {
let b = b as u32;
((b << 2) ^ (b >> 2)) & 31
}
#[inline(always)]
const fn has_osize_prefix(prefixes: u32) -> bool {
(prefixes >> prefix_hash(PRE_OSIZE)) & 1 != 0
}
#[inline(always)]
const fn has_rep_prefix(prefixes: u32) -> bool {
prefixes & ((1 << prefix_hash(PRE_REPNE)) | (1 << prefix_hash(PRE_REP))) != 0
}
const _: () = {
let prefixes = [
0x26, 0x2e, 0x36, 0x3e, 0x64, 0x65, 0x66, 0x67, 0xf0, 0xf2, 0xf3,
];
let mut bitset = 0;
let mut i = 0;
while i < prefixes.len() {
let b = prefixes[i];
bitset |= 1u32 << prefix_hash(b);
assert!(OPCODES0[b as usize] == BP);
i += 1;
}
assert!(bitset.count_ones() == 11);
};
macro_rules! define_streams {
($($i:ident $s:literal),* $(,)?) => (
define_streams! { @0 $($i)* }
const STREAM_NAMES: [&str; NUM_STREAMS] = [$(stringify!($i)),*];
const STREAM_SIZES: [usize; NUM_STREAMS] = [$($s / 8),*];
);
(@$c:tt $i:ident $($t:tt)*) => (
#[allow(dead_code)] const $i: usize = $c;
define_streams! { @(1 + $c) $($t)* }
);
(@$c:tt) => (
const NUM_STREAMS: usize = $c;
);
}
define_streams! {
ST_OP 8,
ST_EVEX 8, ST_VEX 8, ST_REX 8, ST_SIB 8,
ST_CALL_IDX 8,
ST_DISP8_R0 8, ST_DISP8_R1 8, ST_DISP8_R2 8, ST_DISP8_R3 8,
ST_DISP8_R4 8, ST_DISP8_R5 8, ST_DISP8_R6 8, ST_DISP8_R7 8,
ST_DISP32 32,
ST_JUMP8 8, ST_JUMP32 32, ST_JUMP64 64,
ST_IMM8 8, ST_IMM16 16, ST_IMM32 32, ST_IMM64 64,
ST_ADDR16 16, ST_ADDR32 32, ST_ADDR64 64,
ST_CALL32 32, ST_CALL64 64,
ST_JUMPTAB64 64,
ST_PAD0 0, ST_PAD1 0, ST_PAD2 0, ST_PAD3 0, ST_PAD4 0, ST_PAD5 0, ST_PAD6 0, ST_PAD7 0,
ST_PAD8 0, ST_PAD9 0, ST_PAD10 0, ST_PAD11 0, ST_PAD12 0, ST_PAD13 0, ST_PAD14 0, ST_PAD15 0,
}
const ST_MODRM: usize = ST_OP;
const ST_AJUMP32: usize = ST_JUMP32;
const ST_JUMPTAB_COUNT: usize = ST_OP;
#[derive(Debug)]
pub struct Streams {
origin: u64,
streams: [Vec<u8>; NUM_STREAMS],
}
impl Streams {
fn new(origin: u64) -> Self {
Self {
origin,
streams: array::from_fn(|_| Vec::new()),
}
}
fn check(&self, st: usize, size: usize) {
let expected = STREAM_SIZES[st];
if expected > 0 {
debug_assert_eq!(size, expected);
}
}
fn put8(&mut self, st: usize, v: u8) {
if log_encode!() {
print!("({}:{v:02X})", &STREAM_NAMES[st][3..]);
}
self.check(st, 1);
self.streams[st].push(v);
}
fn put32(&mut self, st: usize, v: u32) {
if log_encode!() {
print!("({}:{v:08X})", &STREAM_NAMES[st][3..]);
}
self.check(st, 4);
self.streams[st].extend_from_slice(&v.to_le_bytes());
}
fn put64(&mut self, st: usize, v: u64) {
if log_encode!() {
print!("({}:{v:016X})", &STREAM_NAMES[st][3..]);
}
self.check(st, 8);
self.streams[st].extend_from_slice(&v.to_le_bytes());
}
fn copy(&mut self, st: usize, v: &[u8]) {
if log_encode!() {
print!("({}:{})", &STREAM_NAMES[st][3..], {
struct Hex<'a>(&'a [u8]);
impl fmt::Display for Hex<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for b in self.0 {
write!(f, "{b:02X}")?;
}
Ok(())
}
}
Hex(v)
});
}
self.check(st, v.len());
self.streams[st].extend_from_slice(v);
}
fn op(&mut self, map: u8, op: u8) {
if map > 0 || op & 0xf0 == 0x40 {
self.op_map(map);
}
self.put8(ST_OP, op);
}
fn op_esc(&mut self, b: u8) {
if log_encode!() {
//print!("(OP:{ESC:02X}=ESC)(OP:{b:02X})");
print!("(OP:{ESC:02X})(OP:{b:02X})");
}
self.streams[ST_OP].extend_from_slice(&[ESC, b]);
}
fn op_map(&mut self, m: u8) {
let b = 0x40 + m;
if log_encode!() {
//print!("(OP:{b:02X}=MAP{m:X})");
print!("(OP:{b:02X})");
}
assert!(m < 16);
self.streams[ST_OP].push(b);
}
fn call32(&mut self, idx: u8, target: u32) {
if idx == 0xff {
if log_encode!() {
print!("(CALL_IDX:{idx:02X})(CALL32:{target:08X})");
}
self.streams[ST_CALL_IDX].push(idx);
self.streams[ST_CALL32].extend_from_slice(&target.to_le_bytes());
} else {
if log_encode!() {
print!("(CALL_IDX:{idx:02X})");
}
self.streams[ST_CALL_IDX].push(idx);
}
}
fn call64(&mut self, idx: u8, target: u64) {
if idx == 0xff {
if log_encode!() {
print!("(CALL_IDX:{idx:02X})(CALL64:{target:016X})");
}
self.streams[ST_CALL_IDX].push(idx);
self.streams[ST_CALL64].extend_from_slice(&target.to_le_bytes());
} else {
if log_encode!() {
print!("(CALL_IDX:{idx:02X})");
}
self.streams[ST_CALL_IDX].push(idx);
}
}
fn jumptab(&mut self, count: u8) {
if log_encode!() {
//print!("(OP:{JUMPTAB:02X}=JUMPTAB)(JUMPTAB_COUNT:{count:02X})");
print!("(OP:{JUMPTAB:02X})(OP:{count:02X})");
}
self.streams[ST_OP].push(JUMPTAB);
self.streams[ST_JUMPTAB_COUNT].push(count);
}
pub fn write_to(&self, w: &mut impl Write) -> io::Result<()> {
let mut mask = 0u64;
for (i, stream) in self.streams.iter().enumerate() {
if !stream.is_empty() {
mask |= 1 << i as u64;
}
}
assert!(NUM_STREAMS <= 60);
let mut header = vec![];
header.extend_from_slice(&self.origin.to_le_bytes());
header.extend_from_slice(&mask.to_le_bytes());
for stream in &self.streams {
if !stream.is_empty() {
header.extend_from_slice(&(stream.len() as u32).to_le_bytes());
}
}
w.write_all(must_cast_slice(&header))?;
for stream in &self.streams {
w.write_all(stream)?;
}
Ok(())
}
}
#[inline(always)]
const fn rex_has_w(rex: u8) -> bool {
rex & 0x08 != 0
}
// ___ ____ ____
// VEX3 RXB0mmmm WvvvvLpp -> 1vvvvLpp 0000WRXB + map mmmm
// ^
// The bit 4 of the second byte is technically the fifth map bit,
// which gets ignored by the current encoding scheme
// because it is currently completely unused.
#[inline(always)]
fn shuffle_vex3([x, y]: [u8; 2]) -> Option<([u8; 2], u8)> {
if x & 0x10 != 0 {
return None;
}
let map = x & 0x0f;
let rex = (y >> 4) & 0x08 | (!x >> 5);
let vex = 0x80 | (y & 0x7f);
Some(([vex, rex], map))
}
#[inline(always)]
fn unshuffle_vex3([vex, rex]: [u8; 2], map: u8) -> Option<[u8; 2]> {
if vex & 0x80 == 0 || rex & 0xf0 != 0 || map >= 16 {
return None;
}
let x = (!rex & 0x07) << 5 | map;
let y = (rex & 0x08) << 4 | (vex & 0x7f);
Some([x, y])
}
// _____ ____
// VEX2 RvvvvLpp -> 1vvvvLpp 00000R00 + map 1 (implied)
#[inline(always)]
fn shuffle_vex2([x]: [u8; 1]) -> ([u8; 2], u8) {
let rex = (!x >> 5) & 0x04;
let vex = 0x80 | (x & 0x7f);
([vex, rex], 1)
}
#[inline(always)]
fn unshuffle_vex2([vex, rex]: [u8; 2], map: u8) -> Option<[u8; 1]> {
if vex & 0x80 == 0 || rex & 0xfb != 0 || map != 1 {
return None;
}
let x = (!rex & 0x04) << 5 | (vex & 0x7f);
Some([x])
}
// _____ _____ _ _____
// EVEX RXBrbmmm Wvvvvxpp **L*V*** -> **0*0*** VvvvvLpp 0rxbWRXB + map mmm
#[inline(always)]
fn shuffle_evex([x, y, z]: [u8; 3]) -> ([u8; 3], u8) {
let map = x & 0x07;
let rex = (!x & 0x10) << 2 | (!x & 0x08) << 1 | (!x >> 5) | (!y & 0x04) << 3 | (y >> 4) & 0x08;
let vex = (y & 0x7b) | (z >> 3) & 0x04 | (z & 0x08) << 4;
let evex = z & 0xd7;
([evex, vex, rex], map)
}
#[inline(always)]
fn unshuffle_evex([evex, vex, rex]: [u8; 3], map: u8) -> Option<[u8; 3]> {
if evex & 0x28 != 0 || rex & 0x80 != 0 || map >= 8 {
return None;
}
let x = (!rex & 0x07) << 5 | (!rex >> 2) & 0x10 | (!rex >> 1) & 0x08 | map;
let y = (rex & 0x08) << 4 | (vex & 0x7b) | (!rex >> 3) & 0x04;
let z = evex | (vex & 0x04) << 3 | (vex >> 4) & 0x08;
Some([x, y, z])
}
#[inline(always)]
const fn parse_modrm(modrm: u8) -> (u8 /*mode*/, u8 /*base*/) {
(modrm >> 6, modrm & 0b111)
}
#[inline(always)]
const fn modrm_is_reg_only((mode, _base): (u8, u8)) -> bool {
mode == 0b11
}
#[inline(always)]
const fn modrm_reg(modrm: u8) -> u8 {
modrm >> 3 & 7
}
#[inline(always)]
const fn modrm_has_sib((mode, base): (u8, u8)) -> bool {
mode < 0b11 && base == 0b100
}
fn range_chunks(count: usize, chunk_size: usize) -> impl Iterator<Item = std::ops::Range<usize>> {
let remainder = count % chunk_size;
(0..count - remainder)
.step_by(chunk_size)
.map(move |start| start..start + chunk_size)
.chain(if remainder > 0 {
Some(count - remainder..count)
} else {
None
})
}
// Try to recognize common function boundary padding starting from `code[0]`:
// - `00` (ADD Eb,Gb)
// - `90` (NOP)
// - `0F 1F /0` (NOP E[bv])
// `0F 1F 00'000'reg`
// `0F 1F 00'000'100 zz'zzz'zzz`
// `0F 1F 00'000'100 zz'zzz'101 XX XX XX XX`
// `0F 1F 00'000'101 XX XX XX XX`
// `0F 1F 01'000'reg XX`
// `0F 1F 01'000'100 ZZ XX`
// `0F 1F 10'000'reg XX XX XX XX`
// `0F 1F 10'000'100 ZZ XX XX XX XX`
// - `CC` (INT3)
// - `89 11'reg'reg` (MOV R#,R#)
// - `8D 00'reg'reg` (LEA R#,[R#]) where reg != 101
// - `8D 01'reg'reg 00` (LEA R#,[R#+00h])
// - `8D 10'reg'reg 00 00 00 00` (LEA R#,[R#+00000000h])
// - `8D 00'reg'100 zz'100'reg (LEA R#,[R#*1]) where reg != 101
// - `8D 01'reg'100 zz'100'reg 00` (LEA R#,[R#*1+00h]) where reg != 101
// - `8D 10'reg'100 zz'100'reg 00 00 00 00` (LEA R#,[R#*1+00000000h]) where reg != 101
// - Any 1-byte prefix besides from LOCK and REX: `26 2E 36 3E 64 65 66 67 F2 F3`
fn scan_pad(mut code: &[u8]) -> usize {
let len = code.len();
// Skip any 00 bytes only at the very beginning of possible padding.
let n = code.iter().position(|&b| b != 0x00).unwrap_or(len);
code = &code[n..];
loop {
code = match code {
[0xcc, rest @ ..]
| [0x90, rest @ ..]
| [0x66, 0x90, rest @ ..]
| [0x0f, 0x1f, 0x00, rest @ ..]
| [0x0f, 0x1f, 0x40, _, rest @ ..]
| [0x0f, 0x1f, 0x44, 0x00, _, rest @ ..]
| [0x66, 0x0f, 0x1f, 0x44, 0x00, _, rest @ ..]
| [0x0f, 0x1f, 0x80, _, _, _, _, rest @ ..]
| [0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..]
| [0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..]
| [0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..]
| [0x66, 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, _, _, _, _, rest @ ..] => rest,
_ => break,
};
}
len - code.len()
}
/*
https://stackoverflow.com/questions/25545470/long-multi-byte-nops-commonly-understood-macros-or-other-notation
https://gist.github.com/stevemk14ebr/d117e8d0fd1432fb2a92354a034ce5b9
if(code == OP_RETNI || code == OP_RETN || code == OP_INT3) // return/padding
nextIsFunc = sTRUE; // next opcode is likely to be first of a new function
CALL 9A E8 FF/2-3
JMP E9 EA EB FF/4-5
JMPE 0FB8(w/o F2/F3) 0F00/5
RET C2 C3 CA CB CF
JMPABS REX2+A1
REX2 invalid: 7x Ax Ex 13x
JMPABS transfers program control to the 64-bit absolute address target64 given as a quadword
immediate. JMPABS is in legacy map 0 and requires a REX2 prefix with REX2.M0 = 0 and REX2.W = 0. All
other REX2 payload bits are ignored, and code-generators should set these bits to 0. JMPABS does not
have a ModRM byte and target64 is placed immediately after the opcode byte, so the entire instruction is
11 bytes long. Prefixing JMPABS with 0x66, 0x67, 0xF0, 0xF2, or 0xF3 triggers #UD. Segment overrides are
allowed but ignored by JMPABS.
padding + function target
*/
const fn pad_followed(op: u16) -> bool {
// TODO
matches!(
op,
0x9a | 0xe8 | 0xe9 | 0xea | 0xeb | 0xc2 | 0xc3 | 0xca | 0xcb | 0xcc | 0xcf
)
}
struct CallCache {
cache: [u64; 0x100],
}
impl CallCache {
fn new() -> Self {
Self {
cache: [0u64; 0x100],
}
}
#[inline(always)]
fn find_index(&self, target: u64) -> u8 {
self.cache
.iter()
.position(|&cached| cached == target)
.unwrap_or(0xff) as u8
}
#[inline(always)]
fn find_target(&self, idx: u8) -> Option<u64> {
if idx < 0xff {
Some(self.cache[idx as usize])
} else {
None
}
}
#[inline(always)]
fn record(&mut self, idx: u8, target: u64) {
self.cache.copy_within(0..(idx as usize), 1);
self.cache[0] = target;
}
}
pub fn encode(mut code: Vec<u8>, mut origin: u64, is64: bool) -> io::Result<Streams> {
let mut st = Streams::new(origin);
let mut call_cache = CallCache::new();
let mut call_idx = |target: u64| {
let idx = call_cache.find_index(target);
call_cache.record(idx, target);
idx
};
let code_len = code.len();
let code_start = origin;
let code_end = origin + code_len as u64;
// should be enough for this encoding scheme
const SENTINEL: usize = 15;
code.extend_from_slice(&[0u8; SENTINEL]);
let mut prefixes = 0;
let mut pad = false;
let mut code = &code[..];
while code.len() > SENTINEL {
if log_encode!() {
println!();
print!("{origin:06X}: ");
//print!("{:02X?} ", &code[..SENTINEL]);
}
// Try to skip any padding.
if pad {
let stream = ST_PAD0 + (origin & 0xf) as usize;
let pad_size = scan_pad(&code[..code.len() - SENTINEL]);
let mut padding;
(padding, code) = code.split_at(pad_size);
origin += pad_size as u64;
while padding.len() >= 0xff {
let chunk;
(chunk, padding) = padding.split_at(0xff);
st.put8(stream, 0xff);
st.copy(stream, chunk);
}
st.put8(stream, padding.len() as u8);
st.copy(stream, padding);
if false {
// Speculatively record the next instruction as a call target.
call_idx(origin);
}
pad = false;
continue;
}
// Detect a possible jump table of at least 3 entries.
const MIN_JUMPTAB: usize = 3;
if !is64 {
// TBW
let mut i = 0;
let min_addr = code_start as u32;
let max_addr = (code_end - 1) as u32;
while i < code.len() - SENTINEL - 4 {
let addr = to_u32(&code[i..i + 4]);
if addr < min_addr || max_addr < addr {
break;
}
i += 4;
}
if i >= MIN_JUMPTAB * 4 {
for range in range_chunks(i / 4, 0x100) {
st.jumptab((range.len() - 1) as u8);
for j in range {
let addr = to_u32(&code[j * 4..j * 4 + 4]);
st.call32(call_idx(addr as u64), addr);
}
}
code = &code[i..];
origin += i as u64;
continue;
}
} else if origin % 8 == 0 {
// In x86-64, jump tables are typically 64-bit aligned offsets.
// Since we don't know where they will be used,
// we assume that they may be used anywhere within this `code`.
let min_offset = -(code_len as i64 - 1);
let max_offset = code_len as i64 - 1;
let mut i = 0;
while i < code.len() - SENTINEL - 8 {
let offset = to_u64(&code[i..i + 8]) as i64;
if offset < min_offset || max_offset < offset {
break;
}
i += 8;
}
if i >= MIN_JUMPTAB * 8 {
for range in range_chunks(i / 8, 0x100) {
st.jumptab((range.len() - 1) as u8);
for j in range {
st.put64(ST_JUMPTAB64, to_u64(&code[j * 8..j * 8 + 8]));
}
}
code = &code[i..];
origin += i as u64;
continue;
}
}
let mut i = 0;
let mut pre = 0;
let mut evex = 0;
let mut vex = 0;
let mut rex = 0;
let mut map = 0;
let mut op = 0;
// Handle prefixes that cannot be combined first.
// They are all followed by ModR/M, where mode = 0b11 is required in x86-32.
if is64 || modrm_is_reg_only(parse_modrm(code[1])) {
match code[0] {
PRE_VEX3 => {
if let Some(ret) = shuffle_vex3([code[1], code[2]]) {
pre = PRE_VEX3;
([vex, rex], map) = ret;
op = code[3];
i = 4;
}
}
PRE_VEX2 => {
pre = PRE_VEX2;
([vex, rex], map) = shuffle_vex2([code[1]]);
op = code[2];
i = 3;
}
PRE_EVEX => {
pre = PRE_EVEX;
([evex, vex, rex], map) = shuffle_evex([code[1], code[2], code[3]]);
op = code[4];
i = 5;
}
_ => {}
}
}
// *VEX cannot coexist with 0F or REX.
let has_vex = i > 0;
if !has_vex {
if is64 {
let c = code[0];
if c & 0xf0 == 0x40 {
// REX (40..4F)
pre = REX_MARKER;
rex = c & 0x0f;
i = 1;
} else if c == PRE_REX2 {
pre = PRE_REX2;
rex = code[1];
i = 2;
}
}
if code[i] == PRE_2BYTE {
map = 1;
op = code[i + 1];
i += 2;
} else {
op = code[i];
i += 1;
}
}
let i = Cell::new(i);
let fetch8 = || {
let ret = code[i.get()];
i.set(i.get() + 1);
ret
};
let fetch32 = || {
let ret = to_u32(&code[i.get()..i.get() + 4]);
i.set(i.get() + 4);
ret
};
let copy = |n: usize, streams: &mut Streams, st: usize| {
debug_assert_eq!(STREAM_SIZES[st], n);
streams.copy(st, &code[i.get()..i.get() + n]);
i.set(i.get() + n);
};
let rel_to_abs = |addr: u32, delta: usize| {
(origin + (i.get() + delta) as u64).wrapping_add(addr as i32 as u64)
};
let mut flags = lookup_opcode(op, map, is64);
if flags == BP {
// 1-byte prefixes can't have any *VEX and REX prefix.
if i.get() != 1 {
flags = XX;
} else {
assert_eq!(map, 0);
prefixes |= 1 << prefix_hash(op);
st.op(map, op);
code = &code[1..];
origin += 1;
continue;
}
} else if op3_followed(flags) {
// 3-byte opcode prefixes can't have any *VEX prefix which has its own map index.
if has_vex {
flags = XX;
} else {
assert_eq!(map, 1);
map = (flags - M2) + 2;
flags = (flags - M2) + R_;
op = fetch8();
}
}
let mut prefixes = mem::replace(&mut prefixes, 0);
if flags == XX {
// Do NOT commit the current position if this instruction is invalid.
st.op_esc(code[0]);
code = &code[1..];
origin += 1;
continue;
}
// Now we can commit any prefixes and opcode.
if pre != 0 {
st.put8(ST_OP, pre);
if pre == PRE_EVEX {
st.put8(ST_EVEX, evex);
st.put8(ST_VEX, vex);
} else if pre == PRE_VEX2 || pre == PRE_VEX3 {
st.put8(ST_VEX, vex);
}
st.put8(ST_REX, rex);
}
st.op(map, op);
let op = (map as u16) << 8 | op as u16;
match op {
// Parse an additional 16-bit immediate for these:
//
// 9A/EA: CALL/JMP Ap (16-bit segment + 32-bit address)
// C8: ENTER Iw,Ib (16-bit immediate + 8-bit immediate)
OP_CALLF | OP_JMPF | OP_ENTER => {
copy(16 / 8, &mut st, ST_IMM16);
}
// F6/F7: TEST E,I (/0-1) vs. NOT/NEG/[I]MUL/[I]DIV E (/2-7)
OP_GRP3_1 | OP_GRP3_2 | OP_MAP4_GRP3_1 | OP_MAP4_GRP3_2
if modrm_reg(code[i.get()]) >= 2 =>
{
flags = R_;
}
// 0F B8: JMPE Jz (IA-64 only) vs. POPCNT Gv,Ev (F3)
OP_JMPE_POPCNT if has_rep_prefix(prefixes) => {
flags = R_;
}
// MAP7 F8: URDMSR Rq,Id; UWRMSR Id,Rq (immediate size doesn't depend on 66)
OP_URDMSR_UWRMSR => {
prefixes &= !(1 << prefix_hash(PRE_OSIZE));
}
_ => {}
}
pad = pad_followed(op);
// ModR/M present
if has_modrm(flags) {
flags = modrm_to_imm(flags);
let modrm = fetch8();
st.put8(ST_MODRM, modrm);
let (mode, base) = parse_modrm(modrm);
let sib;
if modrm_has_sib((mode, base)) {
sib = fetch8();
st.put8(ST_SIB, sib);
} else {
sib = 0;
}
match mode {
0 if base == 5 => {
let addr = fetch32();
if is64 {
// [eip+disp32] or [rip+disp32]
// Note that we haven't fully decoded operands yet, hence a delta.
let delta = [0, 1, 2, 4][flags as usize];
st.put64(ST_ADDR64, rel_to_abs(addr, delta));
} else {
st.put32(ST_ADDR32, addr); // [disp32]
}
}
0 if sib & 7 == 5 => copy(32 / 8, &mut st, ST_DISP32), // [reg*scale+disp32]
// [reg+disp8] or [reg*scale+disp8]
1 => copy(8 / 8, &mut st, ST_DISP8_R0 + base as usize),
2 => copy(32 / 8, &mut st, ST_DISP32), // [reg+disp32]
_ => {}
}
}
match flags {
J4 => {
let target = rel_to_abs(fetch32(), 0);
if op == OP_CALLN {
let idx = call_idx(target);
if is64 {
st.call64(idx, target);
} else {
st.call32(idx, target as u32);
}
} else {
if is64 {
st.put64(ST_JUMP64, target);
} else {
st.put32(ST_JUMP32, target as u32);
}
}
}
A_ => {
// EA: 32-bit only, 16-bit if 66 ("Ap" = w:z)
// Ax: 32-bit or 64-bit, fixed per operating mode ("Ov")
let lgn = if is64 {
3
} else if op == OP_JMPF && has_osize_prefix(prefixes) {
1
} else {
2
};
copy(1 << lgn, &mut st, (ST_ADDR16 - 1) + lgn);
}
JA => copy(32 / 8, &mut st, ST_AJUMP32),
J1 => copy(8 / 8, &mut st, ST_JUMP8),
N_ => {}
_ => {
assert!(matches!(flags, N1 | N2 | N4 | NZ));
if flags == N4 && has_osize_prefix(prefixes) {
flags = N2;
}
if flags == NZ && !rex_has_w(rex) {
flags = N4;
}
let lgn = (flags - N1) as usize;
copy(1 << lgn, &mut st, ST_IMM8 + lgn);
}
}
let i = i.get();
code = &code[i..];
origin += i as u64;
}
Ok(st)
}
pub fn decode(streams: &Streams, is64: bool) -> Option<Vec<u8>> {
let origin = streams.origin;
let streams = streams
.streams
.each_ref()
.map(|stream| Cell::new(&stream[..]));
let pc = Cell::new(origin);
let mut code = Vec::new();
let read8 = |st: usize| {
if log_decode!() {
print!("({}:", &STREAM_NAMES[st][3..]);
}
let (&[head], tail) = streams[st].get().split_first_chunk::<1>()?;
if log_decode!() {
print!("{head:02X})");
}
streams[st].set(tail);
Some(head)
};
let read32 = |st: usize| {
if log_decode!() {
print!("({}:", &STREAM_NAMES[st][3..]);
}
let (&head, tail) = streams[st].get().split_first_chunk::<4>()?;
let head = u32::from_le_bytes(head);
if log_decode!() {
print!("{head:08X})");
}
streams[st].set(tail);
Some(head)
};
let read64 = |st: usize| {
if log_decode!() {
print!("({}:", &STREAM_NAMES[st][3..]);
}
let (&head, tail) = streams[st].get().split_first_chunk::<8>()?;
let head = u64::from_le_bytes(head);
if log_decode!() {
print!("{head:016X})");
}
streams[st].set(tail);
Some(head)
};
let mut call_cache = CallCache::new();
let read_call = |cache: &mut CallCache| {
let idx = read8(ST_CALL_IDX)?;
let target = if let Some(target) = cache.find_target(idx) {
target
} else if is64 {
read64(ST_CALL64)?
} else {
read32(ST_CALL32)? as u64
};
cache.record(idx, target);
Some(target)
};
let copy = |n: usize, code: &mut Vec<u8>, st: usize| {
if log_decode!() {
print!("({}:", &STREAM_NAMES[st][3..]);
}
let (head, tail) = streams[st].get().split_at(n);
if log_decode!() {
for &b in head {
print!("{:02X}", b);
}
print!(")");
}
code.extend_from_slice(head);
streams[st].set(tail);
Some(())
};
macro_rules! fatal {
($fmt:tt) => {
panic!(
concat!($fmt, " @ {:06X} {:02X?}"),
pc.get(),
&code[code.len().max(15) - 15..]
)
};
}
let mut prefixes = 0;
let mut pad = false;
while !streams[ST_OP].get().is_empty() {
pc.set(origin + code.len() as u64);
if log_decode!() {
println!();
print!("{:06X}: ", pc.get());
}
if pad {
let stream = ST_PAD0 + ((origin as usize + code.len()) & 0xf);
loop {
let pad_size = read8(stream)?;
copy(pad_size as usize, &mut code, stream)?;
if pad_size < 0xff {
break;
}
}
if false {
// Speculatively record the next instruction as a call target.
let target = origin + code.len() as u64;
let idx = call_cache.find_index(target);
call_cache.record(idx, target);
}
pad = false;
continue;
}
let mut op = read8(ST_OP).unwrap();
if op == ESC {
code.push(read8(ST_OP)?);
continue;
}
if op == JUMPTAB {
let count = read8(ST_JUMPTAB_COUNT)? as usize + 1;
if is64 {
for _ in 0..count {
code.extend_from_slice(&read64(ST_JUMPTAB64)?.to_le_bytes());
}
} else {
for _ in 0..count {
code.extend_from_slice(&(read_call(&mut call_cache)? as u32).to_le_bytes());
}
}
continue;
}
let (pre, evex, vex, rex) = match op {
PRE_VEX3 => (op, 0, read8(ST_VEX)?, read8(ST_REX)?),
PRE_VEX2 => (op, 0, read8(ST_VEX)?, read8(ST_REX)?),
PRE_EVEX => (op, read8(ST_EVEX)?, read8(ST_VEX)?, read8(ST_REX)?),
REX_MARKER | PRE_REX2 if !is64 => fatal!("unsupported REX prefixes in x86-32"),
REX_MARKER => (op, 0, 0, read8(ST_REX)?),
PRE_REX2 => (op, 0, 0, read8(ST_REX)?),
_ => (0, 0, 0, 0),
};
if pre != 0 {
op = read8(ST_OP)?;
}
let map;
if op & 0xf0 == 0x40 {
map = op & 0x0f;
op = read8(ST_OP)?;
} else {
map = 0;
}
let mut flags = lookup_opcode(op, map, is64);
if flags == XX {
fatal!("invalid opcode");
} else if flags == BP {
assert_eq!(map, 0);
prefixes |= 1 << prefix_hash(op);
code.push(op);
continue;
}
'prefix: {
match pre {
PRE_VEX3 => {
let Some([x, y]) = unshuffle_vex3([vex, rex], map) else {
fatal!("bad VEX3 prefix");
};
code.extend_from_slice(&[PRE_VEX3, x, y, op]);
break 'prefix;
}
PRE_VEX2 => {
let Some([x]) = unshuffle_vex2([vex, rex], map) else {
fatal!("bad VEX2 prefix");
};
code.extend_from_slice(&[PRE_VEX2, x, op]);
break 'prefix;
}
PRE_EVEX => {
let Some([x, y, z]) = unshuffle_evex([evex, vex, rex], map) else {
fatal!("bad EVEX prefix");
};
code.extend_from_slice(&[PRE_EVEX, x, y, z, op]);
break 'prefix;
}
REX_MARKER => {
if rex & 0xf0 != 0 {
fatal!("bad REX prefix");
}
code.push(0x40 | rex);
}
PRE_REX2 => code.extend_from_slice(&[PRE_REX2, rex]),
0 => {}
_ => unreachable!(),
}
// Only applicable with non-*VEX prefixes.
match map {
0 => code.push(op),
1 => code.extend_from_slice(&[0x0f, op]),
2 => code.extend_from_slice(&[0x0f, 0x38, op]),
3 => code.extend_from_slice(&[0x0f, 0x3a, op]),
_ => fatal!("bad opcode map"),
}
}
let mut prefixes = mem::replace(&mut prefixes, 0);
let op = (map as u16) << 8 | op as u16;
match op {
// Parse an additional 16-bit immediate for these:
//
// 9A/EA: CALL/JMP Ap (16-bit segment + 32-bit address)
// C8: ENTER Iw,Ib (16-bit immediate + 8-bit immediate)
OP_CALLF | OP_JMPF | OP_ENTER => {
copy(16 / 8, &mut code, ST_IMM16)?;
}
// F6/F7: TEST E,I (/0-1) vs. NOT/NEG/[I]MUL/[I]DIV E (/2-7)
OP_GRP3_1 | OP_GRP3_2 | OP_MAP4_GRP3_1 | OP_MAP4_GRP3_2
if modrm_reg(streams[ST_MODRM].get()[0]) >= 2 =>
{
flags = R_;
}
// 0F B8: JMPE Jz (IA-64 only) vs. POPCNT Gv,Ev (F3)
OP_JMPE_POPCNT if has_rep_prefix(prefixes) => {
flags = R_;
}
// MAP7 F8: URDMSR Rq,Id; UWRMSR Id,Rq (immediate size doesn't depend on 66)
OP_URDMSR_UWRMSR => {
prefixes &= !(1 << prefix_hash(PRE_OSIZE));
}
_ => {}
}
pad = pad_followed(op);
let abs_to_rel = |addr: u64, code: &[u8], delta: usize| {
addr.wrapping_sub(origin + (code.len() + delta) as u64)
};
// ModR/M present
if has_modrm(flags) {
flags = modrm_to_imm(flags);
let modrm = read8(ST_MODRM)?;
code.push(modrm);
let (mode, base) = parse_modrm(modrm);
let sib;
if modrm_has_sib((mode, base)) {
sib = read8(ST_SIB)?;
code.push(sib);
} else {
sib = 0;
}
match mode {
0 if base == 5 => {
let addr = if is64 {
// [eip+disp32] or [rip+disp32]
// Note that we haven't fully decoded operands yet, hence a delta.
let delta = [0, 1, 2, 4][flags as usize];
abs_to_rel(read64(ST_ADDR64)?, &code, delta + 4) as u32
} else {
read32(ST_ADDR32)? // [disp32]
};
code.extend_from_slice(&addr.to_le_bytes());
}
// [reg*scale+disp32]
0 if sib & 7 == 5 => copy(32 / 8, &mut code, ST_DISP32)?,
// [reg+disp8] or [reg*scale+disp8]
1 => copy(8 / 8, &mut code, ST_DISP8_R0 + base as usize)?,
2 => copy(32 / 8, &mut code, ST_DISP32)?, // [reg+disp32]
_ => {}
}
}
match flags {
J4 => {
let target = if op == OP_CALLN {
read_call(&mut call_cache)?
} else if is64 {
read64(ST_JUMP64)?
} else {
read32(ST_JUMP32)? as u64
};
let target = abs_to_rel(target as u64, &code, 4) as u32;
code.extend_from_slice(&target.to_le_bytes());
}
A_ => {
// EA: 32-bit only, 16-bit if 66 ("Ap" = w:z)
// Ax: 32-bit or 64-bit, fixed per operating mode ("Ov")
let lgn = if is64 {
3
} else if op == OP_JMPF && has_osize_prefix(prefixes) {
1
} else {
2
};
copy(1 << lgn, &mut code, (ST_ADDR16 - 1) + lgn)?;
}
JA => copy(32 / 8, &mut code, ST_AJUMP32)?,
J1 => copy(8 / 8, &mut code, ST_JUMP8)?,
N_ => {}
_ => {
assert!(matches!(flags, N1 | N2 | N4 | NZ));
if flags == N4 && has_osize_prefix(prefixes) {
flags = N2;
}
if flags == NZ && !rex_has_w(rex) {
flags = N4;
}
let lgn = (flags - N1) as usize;
copy(1 << lgn, &mut code, ST_IMM8 + lgn)?;
}
}
}
Some(code)
}
pub fn locate_code(f: &mut File) -> io::Result<Vec<(u64, u64, usize)>> {
f.seek(SeekFrom::Start(0))?;
let mut r = BufReader::new(f);
// MZ header
let mut buf = [0u8; 0x40];
r.read_exact(&mut buf)?;
let sig = to_u16(&buf[0..2]);
if sig != 0x5a4d {
return Err(io::Error::other(format!("bad MZ signature {sig:#x}")));
}
let pe_offset = to_u32(&buf[0x3c..0x40]);
if pe_offset < 0x40 {
return Err(io::Error::other(format!(
"too low offset to PE header {pe_offset:#x}"
)));
}
r.seek_relative(pe_offset as i64 - 0x40)?;
// PE header
let mut buf = [0u8; 0x18];
r.read_exact(&mut buf)?;
let sig = to_u32(&buf[0..4]);
if sig != 0x4550 {
return Err(io::Error::other(format!("bad PE signature {sig:#x}")));
}
let num_sections = to_u16(&buf[6..8]);
let opt_header_size = to_u16(&buf[0x14..0x16]) as usize;
// PE optional header
let mut opt_header = vec![0u8; opt_header_size];
r.read_exact(&mut opt_header)?;
let magic = to_u16(&opt_header[0..2]);
match magic {
0x10b => {
// IMAGE_OPTIONAL_HEADER32
todo!();
}
0x20b => {
// IMAGE_OPTIONAL_HEADER64
if opt_header_size < 0x60 {
return Err(io::Error::other(format!(
"PE64 optional header too small ({opt_header_size:#x} < 0x60)"
)));
}
let num_data_dirs = to_u32(&opt_header[0x5c..0x60]) as usize;
let min_size = 0x60 + num_data_dirs * 0x10;
if opt_header_size < min_size {
return Err(io::Error::other(format!(
"PE64 optional header too small ({opt_header_size:#x} < {min_size:#x})"
)));
}
// data directories:
// EXPORT, IMPORT, RESOURCE, EXCEPTION, SECURITY, BASERELOC, DEBUG, COPYRIGHT,
// GLOBALPTR, TLS, LOAD_CONFIG, BOUND_IMPORT, IAT, DELAY_IMPORT, COM_DESCRIPTOR, -
}
_ => {
return Err(io::Error::other(format!(
"bad PE optional header magic {magic:#x}"
)))
}
}
// section headers
let mut exec_sections = Vec::new();
for _ in 0..num_sections {
let mut buf = [0u8; 40];
r.read_exact(&mut buf)?;
let name = &buf[..8];
let _name = String::from_utf8_lossy(name);
let rva = to_u32(&buf[12..16]);
let stored_size = to_u32(&buf[16..20]);
let stored_offset = to_u32(&buf[20..24]);
let flags = to_u32(&buf[36..40]);
//println!("section {_name:?}: rva {rva:#x}, stored {stored_offset:#x} + {stored_size:#x}, flags {flags:#x}");
if flags & 0x20 != 0 {
exec_sections.push((rva as u64, stored_offset as u64, stored_size as usize));
}
}
Ok(exec_sections)
}
#[test]
fn test_disfilter() -> io::Result<()> {
std::env::set_var("RUST_LOG", "trace");
env_logger::init();
let mut f = File::open(r"c:\Program Files\ImageMagick-7.1.1-Q16-HDRI\ffmpeg.exe")?;
for (origin, offset, size) in locate_code(&mut f)? {
f.seek(SeekFrom::Start(offset))?;
let mut input = vec![0u8; size];
f.read_exact(&mut input)?;
File::create(r"x:\unfiltered.bin")?.write_all(&input)?;
let start = std::time::Instant::now();
let streams = encode(input.clone(), origin, true)?;
let enc_rate = size as f64 / start.elapsed().as_secs_f64() / 1e6;
streams.write_to(&mut File::create(r"x:\filtered.bin")?)?;
let start = std::time::Instant::now();
let recons = decode(&streams, true).expect("round trip failed");
let dec_rate = size as f64 / start.elapsed().as_secs_f64() / 1e6;
if input != recons {
let mismatch = std::iter::zip(&input, &recons)
.position(|(a, b)| a != b)
.unwrap();
let lo = mismatch.max(15) - 15;
let hi = mismatch + 15;
panic!(
"input != recons\n \
Input: {mismatch}/{} {:02X?}\n \
Recons: {mismatch}/{} {:02X?}",
input.len(),
&input[lo..hi.min(input.len())],
recons.len(),
&recons[lo..hi.min(recons.len())],
);
}
eprintln!("Disfilter: encoding {enc_rate:.2} MB/s, decoding {dec_rate:.2} MB/s");
break;
}
Ok(())
}
#[test]
fn test_call_cache() {
let mut expected = vec![
(1234, 0xff),
(1234, 0),
(5678, 0xff),
(1234, 1),
(1234, 0),
(5678, 1),
];
for i in 9000..9256 {
expected.push((i, 0xff));
}
expected.push((1234, 0xff));
expected.push((5678, 0xff));
let mut cache = CallCache::new();
for &(target, idx) in &expected {
assert_eq!(cache.find_index(target), idx);
cache.record(idx, target);
}
let mut cache = CallCache::new();
for &(target, idx) in &expected {
if idx == 0xff {
assert_eq!(cache.find_target(idx), None);
} else {
assert_eq!(cache.find_target(idx), Some(target));
}
cache.record(idx, target);
}
}
#[cfg(test)]
fn test_shuffle<const IN: usize, const OUT: usize>(
shuffle: impl Fn([u8; IN]) -> Option<([u8; OUT], u8)>,
unshuffle: impl Fn([u8; OUT], u8) -> Option<[u8; IN]>,
map_range: impl Iterator<Item = u8> + Clone,
) {
#[inline(always)]
fn generate<const N: usize>() -> impl Iterator<Item = [u8; N]> {
assert!(N <= 3);
(0..1u32 << (N * 8)).map(|n| {
let mut b = [0u8; N];
for i in 0..N {
b[i] = (n >> (i * 8) & 0xff) as u8;
}
b
})
}
let mut seen = std::collections::HashSet::new();
for i in generate::<IN>() {
if let Some((o, map)) = shuffle(i) {
assert_eq!(
unshuffle(o, map),
Some(i),
"{i:02X?} -> {o:02X?} + {map} -> (roundtrip failed)"
);
assert!(seen.insert((o, map)), "{i:02X?} -> {o:02X?} + {map} (dupe)");
}
}
for o in generate::<OUT>() {
for map in map_range.clone() {
if !seen.contains(&(o, map)) {
assert_eq!(
unshuffle(o, map),
None,
"? <- {o:02X?} + {map} (didn't fail)"
);
}
}
}
}
#[test]
fn test_shuffle_vex3() {
test_shuffle(shuffle_vex3, unshuffle_vex3, 0..=16);
}
#[test]
fn test_shuffle_vex2() {
test_shuffle(|i| Some(shuffle_vex2(i)), unshuffle_vex2, 0..=16);
}
#[test]
fn test_shuffle_evex() {
test_shuffle(
|i| Some(shuffle_evex(i)),
unshuffle_evex,
[5, 8].into_iter(),
);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment