Skip to content

Instantly share code, notes, and snippets.

@michaeljclark
Last active December 9, 2024 21:47
Show Gist options
  • Save michaeljclark/d94b72fa3d580ea2037e0a4dc5e2fc5b to your computer and use it in GitHub Desktop.
Save michaeljclark/d94b72fa3d580ea2037e0a4dc5e2fc5b to your computer and use it in GitHub Desktop.
five ways to invoke the LLVM disassembler

LLVM programatic disassembly

five ways to invoke the LLVM disassembler.

  • invoking the disassembler interactively using the llvm-mc command [^1]
  • invoking the disassembler in tests using llvm-lit, the LLVM testing tool.
  • invoking the disassembler with a potential --binary option to llvm-objdump [^2]
  • invoking the disassembler using the LLVM C API [^3] (plus code in this gist).
  • invoking the disassembler using the LLVM C++ API (with code in this gist).

llvm-mc

first;y, one can invoke the dissassembler using the llvm-mc command.

$ echo '0x49, 0x0f, 0xc7, 0x0f' | llvm-mc -disassemble -triple x86_64

llvm-lit

secondly, one can invoke the dissassembler using llvm-lit to run disassembly tests.

$ ./build/bin/llvm-lit llvm/test/MC/X86

llvm-objdump

thirdly, one can invoke the dissassembler in a shell script using llvm-objdump.

#!/bin/sh
T=$(mktemp fooXXXXXX)
echo $* | xxd -r -p - > ${T}
# objdump -D -bbinary -mi386:x86-64 -Mintel ${T} | sed -n '/<.data>:/{n;s/0://g p}'
llvm-objdump -d --binary --triple -Mintel ${T} | sed -n '/<.data>:/{n;s/0://g p}'
rm -f ${T}

which we can run like this:

$ ./disasm.sh 49 0f c7 0f
        49 0f c7 0f                  	cmpxchg16b	xmmword ptr [r15]

note: this depends on a pull request to add a --binary option to llvm-objdump[^2].

LLVM C/C++ APIs

and finally, this gist contains example code in C and C++ to show how to invoke the disassembler using the LLVM C API and the LLVM C++ API:

  • llvmdisc.c - example showing how to invoke the disassembler using the LLVM C API.
  • llvmdicpp.cpp - example showing how to invoke the disassembler using the LLVM C++ API.

conclusion

there is also llvm-mc-disassemble-fuzzer.cpp in the LLVM source repo, which I found from grepping the sources, as well as a blog post from 2010 [^1] and 2017[^3]. I started off my journey into finding out about LLVM disassembly with man llvm-objdump as I was expecting something like the GNU binutils objdump -bbinary option. hence the pull request.

references

addendum

here is an article on debugging in-memory JIT code by hooking up in-memory DWARF objects to the debugger by intercepting calls to __jit_debug_register_code.

cmake_minimum_required (VERSION 3.12)
project(llvmdis)
find_package(LLVM REQUIRED CONFIG)
include_directories(${LLVM_INCLUDE_DIRS})
add_definitions(${LLVM_DEFINITIONS_LIST})
add_definitions(-Wall)
add_executable(llvmdisc llvmdisc.c)
target_link_libraries(llvmdisc LLVM)
add_executable(llvmdiscpp llvmdiscpp.cpp)
target_link_libraries(llvmdiscpp LLVM)
#include <stdio.h>
#include "llvm-c/Disassembler.h"
#include "llvm-c/Support.h"
#include "llvm-c/Target.h"
static const int hexcols = 10;
LLVMDisasmContextRef disasm_create(const char *triple, const char *mcpu,
const char *features)
{
return LLVMCreateDisasmCPUFeatures(triple, mcpu, features,
NULL, 0, NULL, NULL);
}
void disasm_format_hex(const uint8_t *data, size_t offset, size_t sz)
{
const char* hexdigits = "0123456789abcdef";
size_t nbytes = sz < hexcols ? sz : hexcols;
printf("%08zx:", offset);
for (size_t i = 0; i < nbytes; i++) {
printf(" %c%c", hexdigits[(data[offset+i] >> 4) & 15],
hexdigits[(data[offset+i] >> 0) & 15]);
}
size_t indent = (hexcols - nbytes) * 3 + 8 - (hexcols * 3) % 8;
for (size_t i = 0; i < indent; i++) {
printf(" ");
}
}
int disasm_dump(LLVMDisasmContextRef ctx, const uint8_t *data, size_t data_len)
{
char text[128];
size_t offset = 0;
while (offset < data_len) {
size_t sz = LLVMDisasmInstruction(ctx, (uint8_t *)data + offset,
data_len - offset, 0, text, sizeof(text));
if (sz == 0) break;
disasm_format_hex(data, offset, sz);
puts(text);
while (sz > hexcols) {
offset += hexcols; sz -= hexcols;
disasm_format_hex(data, offset, sz);
puts("");
}
offset += sz;
};
return 0;
}
void disasm_destroy(LLVMDisasmContextRef ctx)
{
LLVMDisasmDispose(ctx);
}
int main(int argc, char **argv)
{
const char* args[] = { argv[0], "--x86-asm-syntax=intel" };
LLVMParseCommandLineOptions(2, args, "");
LLVMInitializeAllTargetInfos();
LLVMInitializeAllTargetMCs();
LLVMInitializeAllDisassemblers();
const uint8_t insn[] = {
0x48, 0xb8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
0x49, 0x0f, 0xc7, 0x0f,
0x90
};
LLVMDisasmContextRef ctx = disasm_create("x86_64", "", "");
int r = disasm_dump(ctx, insn, sizeof(insn));
disasm_destroy(ctx);
return r;
}
#include <cstdio>
#include <string>
#include "llvm/MC/TargetRegistry.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
using namespace llvm;
struct LLVMDisassembler
{
static const int hexcols = 10;
const Target *tg;
std::string err;
MCTargetOptions options;
std::unique_ptr<MCRegisterInfo> ri;
std::unique_ptr<MCAsmInfo> ai;
std::unique_ptr<MCSubtargetInfo> si;
std::unique_ptr<MCInstrInfo> ii;
std::unique_ptr<MCContext> cx;
std::unique_ptr<MCDisassembler> di;
std::unique_ptr<MCInstPrinter> ip;
LLVMDisassembler(std::string triple, std::string cpu, std::string features)
{
tg = TargetRegistry::lookupTarget(triple, err);
ri.reset(tg->createMCRegInfo(triple));
ai.reset(tg->createMCAsmInfo(*ri, triple, options));
si.reset(tg->createMCSubtargetInfo(triple, cpu, features));
ii.reset(tg->createMCInstrInfo());
cx.reset(new MCContext(Triple(triple), ai.get(), ri.get(), si.get()));
di.reset(tg->createMCDisassembler(*si, *cx));
ip.reset(tg->createMCInstPrinter(Triple(triple),
ai->getAssemblerDialect(), *ai, *ii, *ri));
}
void format_hex(raw_string_ostream &out, ArrayRef<uint8_t> data,
size_t offset, size_t sz)
{
int nbytes = sz < hexcols ? sz : hexcols;
out << format_hex_no_prefix(offset, 8) << ": "
<< format_bytes(data.slice(offset, nbytes), {}, hexcols, 1);
out.indent((hexcols - nbytes) * 3 + 8 - (hexcols * 3) % 8);
}
int disasm(size_t offset, ArrayRef<uint8_t> data)
{
std::string buf;
raw_string_ostream out(buf);
MCInst in;
uint64_t sz;
while (offset < data.size() &&
di->getInstruction(in, sz, data.slice(offset), offset, out))
{
format_hex(out, data, offset, sz);
ip->printInst(&in, offset, "", *si, out);
if (sz == 0) break;
printf("%s\n", buf.c_str());
buf.clear();
while (sz > hexcols) {
offset += hexcols; sz -= hexcols;
format_hex(out, data, offset, sz);
printf("%s\n", buf.c_str());
buf.clear();
}
offset += sz;
}
return 0;
}
};
int main(int argc, char **argv)
{
SmallVector<const char *> Args = { argv[0], "--x86-asm-syntax=intel" };
llvm::cl::ParseCommandLineOptions(Args.size(), Args.data());
InitializeAllTargetInfos();
InitializeAllTargetMCs();
InitializeAllDisassemblers();
const uint8_t insn[] = {
0x48, 0xb8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f,
0x49, 0x0f, 0xc7, 0x0f,
0x90
};
LLVMDisassembler dis("x86_64", "", "");
return dis.disasm(0, insn);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment