Skip to content

Instantly share code, notes, and snippets.

@rrbutani
Last active March 30, 2023 05:09
Show Gist options
  • Save rrbutani/2ab89707d682a43a58d3e26229be4ce6 to your computer and use it in GitHub Desktop.
Save rrbutani/2ab89707d682a43a58d3e26229be4ce6 to your computer and use it in GitHub Desktop.
// We want to slice and dice our opcode map on:
// - opcode byte
// - opcode map (aka leading opcode bytes; if present)
// - prefixes
//
// Unfortunately, C doesn't have pattern matching. The macros that follow are
// our (unfortunate) facsimile.
//
// Essentially we are — in the style of `disas_insn` — appropriating the upper
// bytes of the `u32` we're switching on here to represent the opcode map and
// instruction prefixes:
// - 0b00000000 (prefix) (opcode map) (opcode)
//
// The macros make it ergonomic to carve out a chunk of the map, as you desire.
//
// The implementation is indubitably criminal but the end result is surprisingly
// user friendly: `clang` is able to warn about any overlap between patterns and
// will complain about empty patterns.
//
// Codegen doesn't look too bad either: https://clang.godbolt.org/z/cPdPGPWT1
// This makes is so that we get an error if we accidentally pass a map in place
// of a prefix (and vice versa).
#include <stdint.h>
#include <stdbool.h>
typedef uint32_t u32;
typedef uint8_t u8;
typedef struct { u32 prefix; } prefix;
typedef struct { u32 map; } op_map;
#define APPLY_PREFIX(op, prfx) ((op) | (((prfx).prefix) << 16))
#define APPLY_OP_MAP(op, map_) ((op) | (((map_).map) << 8))
// Prefixes:
// - _00 (PREFIX_NONE)
// - _66 (PREFIX_DATA)
// - _F2 (PREFIX_REPNZ)
// - _F3 (PREFIX_REPZ)
//
// - ALL_PREFIXES
#define PREFIX_NONE ((prefix){ .prefix = 0x00 }) /* 0x00 */
#define PREFIX_REPZ ((prefix){ .prefix = 0x01 }) /* 0xf3 */
#define PREFIX_REPNZ ((prefix){ .prefix = 0x02 }) /* 0xf2 */
#define PREFIX_DATA ((prefix){ .prefix = 0x08 }) /* 0x66 */
#define _00 PREFIX_NONE
#define _F3 PREFIX_REPZ
#define _F2 PREFIX_REPNZ
#define _66 PREFIX_DATA
#define ALL_PREFIXES (_00, _F3, _F2, _66)
// Opcode Maps:
// - MAP_NONE
// - MAP_0F
// - MAP_0F3A
// - MAP_0F38
// - MAP_5
//
// - ALL_MAPS
#define MAP_NONE ((op_map){ .map = 0x00 })
#define MAP_0F ((op_map){ .map = 0x01 })
#define MAP_0F3A ((op_map){ .map = 0x02 })
#define MAP_0F38 ((op_map){ .map = 0x03 })
#define MAP_5 ((op_map){ .map = 0x05 })
#define ALL_MAPS (MAP_NONE, MAP_0F, MAP_0F3A, MAP_0F38, MAP_5)
////////////////////////////////////////////////////////////////////////////////
// Feels like there should be a better way than this...
#define EVAL(...) EVAL1(EVAL1(EVAL1(__VA_ARGS__)))
#define EVAL1(...) EVAL2(EVAL2(EVAL2(__VA_ARGS__)))
#define EVAL2(...) EVAL3(EVAL3(EVAL3(__VA_ARGS__)))
#define EVAL3(...) EVAL4(EVAL4(EVAL4(__VA_ARGS__)))
#define EVAL4(...) EVAL5(EVAL5(EVAL5(__VA_ARGS__)))
#define EVAL5(...) __VA_ARGS__
#define UNWRAP_LIST(...) __VA_ARGS__
// See: https://stackoverflow.com/a/12540675
#define EMPTY(...)
#define DISPATCH_DEFERRED(M, ...) M EMPTY()
#define DISPATCH_N_INDIR() DISPATCH_N
#define DISPATCH_INDIR() DISPATCH_
// Sloppy workaround for the disabling context seeing `INDIR`...
// https://stackoverflow.com/a/12540675 has better approaches
#define _INDIR2(M, ...) M (__VA_ARGS__)
#define _INDIR(M, ...) _INDIR2(M, __VA_ARGS__)
#define _INDIR_B2(M, ...) M (__VA_ARGS__)
#define _INDIR_B(M, ...) _INDIR_B2(M, __VA_ARGS__)
#define _INDIR_C2(M, ...) M (__VA_ARGS__)
#define _INDIR_C(M, ...) _INDIR_C2(M, __VA_ARGS__)
// Takes a list instead of var args:
#define DISPATCH_N_WRAPPER_INNER(N, NARGS, list) \
DISPATCH_DEFERRED(DISPATCH_N_INDIR)() (N, NARGS, list)
#define DISPATCH_N_WRAPPER(N, NARGS, list) \
DISPATCH_N_WRAPPER_INNER(N, NARGS, UNWRAP_LIST list)
#define DISPATCH2_N_WRAPPER_INNER(N, NARGS, list) \
DISPATCH_DEFERRED(DISPATCH_N_INDIR)() (N, NARGS, list)
#define DISPATCH2_N_WRAPPER(N, NARGS, list) \
DISPATCH2_N_WRAPPER_INNER(N, NARGS, UNWRAP_LIST list)
// Base case:
#define DISPATCH_(NEXT_MACRO, NEXT_ARGS_LIST, nothing)
#define DISPATCH_N(NEXT_MACRO, NEXT_ARGS_LIST, first, ...) \
_INDIR(NEXT_MACRO, UNWRAP_LIST NEXT_ARGS_LIST, first) \
DISPATCH_DEFERRED(DISPATCH_ ## __VA_OPT__(N_) ## INDIR)() \
(NEXT_MACRO, NEXT_ARGS_LIST, __VA_ARGS__)
////////////////////////////////////////////////////////////////////////////////
// for each prefix: prepends the adjusted `opc_first` and `opc_last` to
// `NEXT_ARGS_POST` and invokes `NEXT_MACRO`
#define FOR_EACH_PREFIX(NEXT_MACRO, NEXT_ARGS_PRE, NEXT_ARGS_POST, \
opc_first, opc_last, prefixes) \
DISPATCH2_N_WRAPPER( \
EXPAND_PREFIX, \
(NEXT_MACRO, NEXT_ARGS_PRE, NEXT_ARGS_POST, opc_first, opc_last), \
prefixes \
)
#define EXPAND_PREFIX(NEXT_MACRO, PRE, POST, opc_first, opc_last, prefix) \
_INDIR_B(NEXT_MACRO, \
UNWRAP_LIST PRE, \
APPLY_PREFIX(opc_first, prefix), \
APPLY_PREFIX(opc_last, prefix), \
UNWRAP_LIST POST \
)
////////////////////////////////////////////////////////////////////////////////
// for each map: prepends the adjusted `opc_first` and `opc_last` to
// `NEXT_ARGS_POST` and invokes `NEXT_MACRO`
#define FOR_EACH_MAP(NEXT_MACRO, NEXT_ARGS_PRE, NEXT_ARGS_POST, \
opc_first, opc_last, maps) \
DISPATCH_N_WRAPPER( \
EXPAND_MAP, \
(NEXT_MACRO, NEXT_ARGS_PRE, NEXT_ARGS_POST, opc_first, opc_last), \
maps \
)
#define EXPAND_MAP(NEXT_MACRO, PRE, POST, opc_first, opc_last, map) \
_INDIR_C(NEXT_MACRO, \
UNWRAP_LIST PRE, \
APPLY_OP_MAP(opc_first, map), \
APPLY_OP_MAP(opc_last, map), \
UNWRAP_LIST POST \
)
////////////////////////////////////////////////////////////////////////////////
#define EXPAND_CASE(_, opc_first, opc_last, __) case opc_first ... opc_last:
////////////////////////////////////////////////////////////////////////////////
#define MATCH_OP(opcode_last_byte, prefixes, opcode_maps) \
MATCH_OP_RANGE(opcode_last_byte, opcode_last_byte, prefixes, opcode_maps)
#define MATCH_OP_RANGE(opc_first_byte, opc_last_byte, prefixes, opcode_maps) \
EVAL(FOR_EACH_PREFIX( \
FOR_EACH_MAP, (EXPAND_CASE, (_), (_)), (opcode_maps), \
opc_first_byte, opc_last_byte, \
prefixes \
))
////////////////////////////////////////////////////////////////////////////////
const char* test(u8, u8, u8, bool w);
const char* test(u8 prefix_idx, u8 opcode_map_idx, u8 op, bool w) {
const prefix p = { .prefix = prefix_idx };
const op_map m = { .map = opcode_map_idx };
const u32 pattern = APPLY_OP_MAP(APPLY_PREFIX(op, p), m);
switch (pattern) {
MATCH_OP(0x01, (_66), (MAP_0F3A)) return "vpermpd";
MATCH_OP(0x03, (_66), (MAP_0F3A)) return w ? "valignd" : "valignq";
MATCH_OP(0x08, (_66), (MAP_0F3A)) return "vrndscaleps";
MATCH_OP_RANGE(0x10, 0x11, (_F2), (MAP_0F)) return op == 0x10 ? "vmovsd" : "vmovsd mem";
MATCH_OP_RANGE(0x10, 0x11, (_F3), (MAP_0F)) return op == 0x10 ? "vmovss" : "vmovss mem";
MATCH_OP_RANGE(0x10, 0x11, (_00), (MAP_0F)) return op == 0x10 ? "vmovups" : "vmovups mem";
MATCH_OP_RANGE(0x10, 0x11, (_66), (MAP_0F)) return op == 0x10 ? "vmovupd" : "vmovupd mem";
MATCH_OP(0x12, (_F2), (MAP_0F)) return "vmovddup";
MATCH_OP(0x55, (_66), (MAP_0F)) return "vandnpd";
MATCH_OP(0x55, (_00), (MAP_0F)) return "vandnps";
MATCH_OP(0x54, (_66), (MAP_0F)) return "vandpd";
MATCH_OP(0x54, (_00), (MAP_0F)) return "vandps"; // continue from here
MATCH_OP(0x58, (_66), (MAP_0F)) return "vaddpd";
MATCH_OP(0x58, (_00), (MAP_0F)) return "vaddps";
MATCH_OP(0x58, (_F3), (MAP_0F)) return "vaddss";
MATCH_OP(0x58, (_F2), (MAP_0F)) return "vaddsd";
MATCH_OP(0x9A, (_F2), (MAP_0F38)) return "v4fmaddps";
MATCH_OP(0x9B, (_F2), (MAP_0F38)) return "v4fmaddss";
MATCH_OP(0xAA, (_F2), (MAP_0F38)) return "v4nfmaddps";
MATCH_OP(0xAB, (_F2), (MAP_0F38)) return "v4nfmaddss";
MATCH_OP(0xDC, (_66), (MAP_0F38)) return "vaesenc";
MATCH_OP(0xDD, (_66), (MAP_0F38)) return "vaesenclast";
MATCH_OP(0xDE, (_66), (MAP_0F38)) return "vaesdec";
MATCH_OP(0xDF, (_66), (MAP_0F38)) return "vaesdeclast";
/* `cvt`s: */
MATCH_OP(0x13, (_66), (MAP_0F38)) return "vcvtph2ps";
MATCH_OP(0x1D, (_66), (MAP_0F3A)) return "vcvtps2ph";
MATCH_OP(0x2A, (_F3), (MAP_0F)) return "vcvtsi2ss";
MATCH_OP(0x2A, (_F2), (MAP_0F)) return "vcvtsi2sd";
MATCH_OP(0x2C, (_F3), (MAP_0F)) return "vcvttss2si";
MATCH_OP(0x2C, (_F2), (MAP_0F)) return "vcvttsd2si";
MATCH_OP(0x2D, (_F3), (MAP_0F)) return "vcvtss2si";
MATCH_OP(0x2D, (_F2), (MAP_0F)) return "vcvtsd2si";
MATCH_OP(0x5A, (_00), (MAP_0F)) return "vcvtps2pd";
MATCH_OP(0x5A, (_66), (MAP_0F)) return "vcvtpd2ps";
MATCH_OP(0x5A, (_F3), (MAP_0F)) return "vcvtss2sd";
MATCH_OP(0x5A, (_F2), (MAP_0F)) return "vcvtsd2ss";
MATCH_OP(0x5B, (_00), (MAP_0F)) return "vcvtdq2ps";
MATCH_OP(0x79, (_00), (MAP_0F)) return "vcvtpd2udq";
MATCH_OP(0x79, (_66), (MAP_0F)) return "vcvtpd2uqq";
MATCH_OP(0x7B, (_66), (MAP_0F)) return "vcvtpd2qq";
MATCH_OP(0xE6, (_F3), (MAP_0F)) return "vcvtdq2pd";
MATCH_OP(0xE6, (_F2), (MAP_0F)) return "vcvtpd2dq";
// "VCVTDQ2PS xmm1, {k}{z}, xmm2/m128/m32bcst", [5B] "EVEX.128.0F.W0 5B /r","V","V"
// "VCVTPS2DQ xmm1, {k}{z}, xmm2/m128/m32bcst", [5B] "EVEX.128.66.0F.W0 5B /r","V","V"
// "VCVTQQ2PS ymm1{er}, {k}{z}, zmm2", [5B] "EVEX.512.0F.W1 5B /r","V","V"
// "VCVTTPS2DQ xmm1, {k}{z}, xmm2/m128/m32bcst", [5B] "EVEX.128.F3.0F.W0 5B /r","V","V"
// "VCVTTPD2UDQ ymm1{sae}, {k}{z}, zmm2", [78] "EVEX.512.0F.W1 78 /r","V","V"
// "VCVTTPD2UQQ xmm1, {k}{z}, xmm2/m128/m64bcst", [78] "EVEX.128.66.0F.W1 78 /r","V","V"
// "VCVTTPS2UDQ xmm1, {k}{z}, xmm2/m128/m32bcst", [78] "EVEX.128.0F.W0 78 /r","V","V"
// "VCVTTPS2UQQ xmm1, {k}{z}, xmm2/m128/m32bcst", [78] "EVEX.128.66.0F.W0 78 /r","V","V"
// "VCVTTSD2USI r32{sae}, xmm2", [78] "EVEX.128.F2.0F.W0 78 /r","V","V"
// "VCVTTSS2USI r32{sae}, xmm2", [78] "EVEX.128.F3.0F.W0 78 /r","V","V"
// "VCVTSD2USI r32{er}, xmm2", [79] "EVEX.128.F2.0F.W0 79 /r","V","V"
// "VCVTSS2USI r32{er}, xmm2", [79] "EVEX.128.F3.0F.W0 79 /r","V","V"
// "VCVTPS2UDQ xmm1, {k}{z}, xmm2/m128/m32bcst", [79] "EVEX.128.0F.W0 79 /r","V","V"
// "VCVTPS2UQQ xmm1, {k}{z}, xmm2/m128/m32bcst", [79] "EVEX.128.66.0F.W0 79 /r","V","V"
// "VCVTPD2UDQ ymm1{er}, {k}{z}, zmm2", [79] "EVEX.512.0F.W1 79 /r","V","V"
// "VCVTPD2UQQ xmm1, {k}{z}, xmm2/m128/m64bcst", [79] "EVEX.128.66.0F.W1 79 /r","V","V"
// "VCVTTPD2QQ xmm1, {k}{z}, xmm2/m128/m64bcst", [7A] "EVEX.128.66.0F.W1 7A /r","V","V"
// "VCVTTPS2QQ xmm1, {k}{z}, xmm2/m128/m32bcst", [7A] "EVEX.128.66.0F.W0 7A /r","V","V"
// "VCVTUDQ2PD xmm1, {k}{z}, xmm2/m128/m32bcst", [7A] "EVEX.128.F3.0F.W0 7A /r","V","V"
// "VCVTUDQ2PS xmm1, {k}{z}, xmm2/m128/m32bcst", [7A] "EVEX.128.F2.0F.W0 7A /r","V","V"
// "VCVTUQQ2PD xmm1, {k}{z}, xmm2/m128/m64bcst", [7A] "EVEX.128.F3.0F.W1 7A /r","V","V"
// "VCVTUQQ2PS ymm1{er}, {k}{z}, zmm2", [7A] "EVEX.512.F2.0F.W1 7A /r","V","V"
// "VCVTUSI2SD xmm1, xmmV, r/m32", [7B] "EVEX.NDS.LIG.F2.0F.W0 7B /r","V","V"
// "VCVTUSI2SS xmm1, xmmV, r/m32", [7B] "EVEX.NDS.LIG.F3.0F.W0 7B /r","V","V"
// "VCVTPD2QQ xmm1, {k}{z}, xmm2/m128/m64bcst", [7B] "EVEX.128.66.0F.W1 7B /r","V","V"
// "VCVTPS2QQ xmm1, {k}{z}, xmm2/m128/m32bcst", [7B] "EVEX.128.66.0F.W0 7B /r","V","V"
// "VCVTDQ2PD xmm1, {k}{z}, xmm2/m128/m32bcst", [E6] "EVEX.128.F3.0F.W0 E6 /r","V","V"
// "VCVTPD2DQ ymm1{er}, {k}{z}, zmm2", [E6] "EVEX.512.F2.0F.W1 E6 /r","V","V"
// "VCVTQQ2PD xmm1, {k}{z}, xmm2/m128/m64bcst", [E6] "EVEX.128.F3.0F.W1 E6 /r","V","V"
// "VCVTTPD2DQ ymm1{sae}, {k}{z}, zmm2", [E6] "EVEX.512.66.0F.W1 E6 /r","V","V"
default: return "unknown";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment