Last active
May 22, 2024 20:48
-
-
Save spencerpogo/b7530c85705744cce180ec135eba1ebf to your computer and use it in GitHub Desktop.
WIP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from elftools.elf.elffile import ELFFile | |
from elftools.elf.sections import SymbolTableSection | |
from relocation import RelocationHandler | |
from elftools.elf.enums import ENUM_RELOC_TYPE_x64 | |
from elftools.elf.constants import SHN_INDICES | |
from binascii import hexlify | |
from io import BytesIO | |
# from pwn import disasm | |
from capstone import Cs, CS_ARCH_X86, CS_MODE_64 | |
from capstone.x86 import * | |
def symbol_by_name(symbols, sym): | |
"""Find the first symbol with a name matching `sym`.""" | |
for i in range(symbols.num_symbols()): | |
s = symbols.get_symbol(i) | |
if s.name == sym: | |
return s | |
raise AssertionError( | |
f"unable to find symbol {sym!r} in {symbols.num_symbols()} entry symbol table" | |
) | |
def wip_apply_relocations(): | |
"""Messing with applying relocations. not currently used.""" | |
reler = RelocationHandler(e) | |
reler._RELOCATION_RECIPES_X64[ENUM_RELOC_TYPE_x64["R_X86_64_PLT32"]] = ( | |
RelocationHandler._RELOCATION_RECIPE_TYPE( | |
bytesize=4, | |
has_addend=True, | |
calc_func=RelocationHandler._reloc_calc_sym_plus_addend_pcrel, | |
) | |
) | |
rel = reler.find_relocations_for_section(text) | |
# reler.apply_section_relocations(f, rel) | |
def relocations_for_symbol(e, rela, symbol): | |
""" | |
Find all relocations that with a name matching `symbol` in the symbol table | |
associated with `rela`. | |
""" | |
symtab = e.get_section(rela["sh_link"]) | |
return [ | |
r | |
for r in rela.iter_relocations() | |
if symtab.get_symbol(r["r_info_sym"]).name == symbol | |
] | |
def relocation_for_symbol(e, rela, func): | |
""" | |
Find a single relocation with a name matching `symbol` in the symbol table | |
associated with `rela`. Assert that there is only one such matching relocation, | |
and return it. | |
""" | |
rels = relocations_for_symbol(e, rela, func) | |
if len(rels) == 0: | |
raise AssertionError(f"no relocations for func {func!r}") | |
if len(rels) != 1: | |
raise AssertionError( | |
f"expected one relocation for func {func!r}, instead got {len(rels)}" | |
) | |
return rels[0] | |
def find_continaing_function(e, rela, section_offset): | |
""" | |
Find the first STT_FUNC symbol in the symbol table associated with `rela` that | |
contains the byte that lies `section_offset` bytes from the start of the section | |
`rela` is associated with. | |
Contains means within the range [st_value, st_value+st_size). | |
Return the symbol, and the number of bytes from the start of the function to the offset. | |
""" | |
symtab = e.get_section(rela["sh_link"]) | |
for sym in symtab.iter_symbols(): | |
if sym["st_info"]["type"] != "STT_FUNC": | |
continue | |
if ( | |
section_offset >= sym["st_value"] | |
and section_offset <= sym["st_value"] + sym["st_size"] | |
): | |
return sym, section_offset - sym["st_value"] | |
raise AssertionError(f"no STT_FUNC contains this offset") | |
def read_exact(f, n): | |
buff = bytearray(n) | |
pos = 0 | |
while pos < n: | |
cr = f.readinto(memoryview(buff)[pos:]) | |
if cr == 0: | |
raise EOFError | |
pos += cr | |
return buff | |
def read_symbol(f, section, sym): | |
""" | |
Given a `section` and a symbol `sym` assumed to be in it, return the byte contents | |
of that symbol. | |
""" | |
location = section["sh_offset"] + sym["st_value"] | |
print('seek to', hex(location)) | |
f.seek(location) | |
return read_exact(f, sym["st_size"]) | |
calling_convention = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"] | |
calling_convention = [ | |
X86_REG_RDI, | |
X86_REG_RSI, | |
X86_REG_RDX, | |
X86_REG_RCX, | |
X86_REG_R8, | |
X86_REG_R9, | |
] | |
def relocs_in_range(e, rela, start, sz): | |
""" | |
Generator that will yield all relocations in `rela` within the range of section | |
offsets [start, start+sz) given an ELFFile `e` | |
""" | |
for reloc in rela.iter_relocations(): | |
if reloc["r_offset"] >= start and reloc["r_offset"] <= start + sz: | |
yield reloc | |
def reloc_in_range(e, rela, start, sz): | |
relocs = list(relocs_in_range(e, rela, start, sz)) | |
if not relocs: | |
raise AssertionError("no relocations found in range") | |
if len(relocs) != 1: | |
raise AssertionError(f"found {len(relocs)} relocs in range") | |
return relocs[0] | |
def insn_modifying_reg_before(e, trela, text, reg, func, func_offset): | |
""" | |
Return the last instruction after the start of the function given by the symbol | |
`func` but before `func_offset` instruction bytes from that start, that modifies | |
`reg`, given an ELFFile `e`, text relocation table `trela`, and text section `text`. | |
""" | |
md = Cs(CS_ARCH_X86, CS_MODE_64) | |
md.detail = True | |
found = [] | |
for i in md.disasm(text, 0x0): | |
# stop when we get to the instruction containing the relocation in question | |
if i.address + i.size >= func_offset: | |
continue | |
_, regs_written = i.regs_access() | |
if reg in regs_written: | |
found.append(i) | |
if not found: | |
raise AssertionError(f"cannot find an instruction modifying {reg!r}") | |
return found[-1] | |
def read_symbol_relative_reloc(e, rela, reloc, n): | |
symtab = e.get_section(rela["sh_link"]) | |
sym = symtab.get_symbol(reloc["r_info_sym"]) | |
section = e.get_section(sym["st_shndx"]) | |
location = section["sh_offset"] + sym["st_value"] + reloc["r_addend"] | |
print(f"seek to {hex(location)}={location}") | |
e.stream.seek(location) | |
return e.stream.read(n) | |
def unused_subroutine_1(): | |
for _ in [1]: | |
# when compiling a kernel module, the compiler will create an init_module | |
# symbol, and set it to be an alias of the module's init function. It will | |
# then export init_module as a global symbol. | |
# source: | |
# https://terenceli.github.io/%E6%8A%80%E6%9C%AF/2018/06/02/linux-loadable-module | |
# The values of mod->init and mod->exit come from the struct module | |
# __this_module variable contained in the .gnu.linkonce.this_module section of | |
# the module's .ko file. The kernel assumes that this variable is at the start | |
# of the section, and in fact it is the only variable in the section. | |
# https://stackoverflow.com/a/68166097/9196137 | |
init_func_offset = 0x150 | |
f.seek(module_section["sh_offset"] + init_func_offset) | |
init_addr = e.structs.Elf_word64('').parse_stream(f) | |
print("virtual address", hex(init_addr)) | |
init_section_offset = init_addr - addresses[".text"] | |
print("section offset", init_section_offset) | |
init_file_offset = init_addr - base + start_offset | |
print("file offset", hex(init_file_offset)) | |
f.seek(init_file_offset) | |
init_text = f.read(text["sh_size"] - init_section_offset) | |
md = Cs(CS_ARCH_X86, CS_MODE_64) | |
md.detail = True | |
for insn in md.disasm(init_text, 0x0): | |
print(insn) | |
if insn.insn_name() in {"ret", "retq"}: | |
break | |
with open('out', 'wb') as fout: | |
f.seek(0) | |
fout.write(f.read()) | |
return | |
symbols = e.get_section_by_name(".symtab") | |
if not isinstance(symbols, SymbolTableSection): | |
raise AssertionError("unable to load symbol table") | |
init = symbol_by_name(symbols, "init_module") | |
if init["st_info"]["type"] != "STT_FUNC": | |
raise AssertionError("expected init_module to be an STT_FUNC") | |
print(hex(text["sh_offset"] + init["st_value"])) | |
init_text = read_symbol(f, text, init) | |
md = Cs(CS_ARCH_X86, CS_MODE_64) | |
md.detail = True | |
for insn in md.disasm(init_text, 0x0): | |
print(insn) | |
with open('out', 'wb') as fout: | |
f.seek(0) | |
fout.write(f.read()) | |
return | |
def to_signed_32(n): | |
sign_bit = 1 << 31 | |
# extract non-sign bits | |
n = n & ((1 << 32) - 1) | |
# flip sign bit: move negatives from above positives to below | |
n = n ^ sign_bit | |
# move negatives below zero | |
return n - sign_bit | |
def main(): | |
with open(sys.argv[1], "rb") as f_rdonly: | |
# if you want to modify the data, such as by applying relocations, read the | |
# entire file into a BytesIO and use it. | |
f = BytesIO(f_rdonly.read()) | |
# otherwise, use the read-only file handle directly. | |
# f = f_rdonly | |
e = ELFFile(f) | |
text = e.get_section_by_name(".text") | |
reler = RelocationHandler(e) | |
trela = reler.find_relocations_for_section(text) | |
if not trela.is_RELA(): | |
raise AssertionError( | |
"text relocations are REL but expected RELA. this will probably work " | |
+ "just as well, but it hasn't been tested." | |
) | |
reler = RelocationHandler(e) | |
reler._RELOCATION_RECIPES_X64[ENUM_RELOC_TYPE_x64["R_X86_64_PLT32"]] = ( | |
RelocationHandler._RELOCATION_RECIPE_TYPE( | |
bytesize=4, | |
has_addend=True, | |
calc_func=RelocationHandler._reloc_calc_sym_plus_addend_pcrel, | |
) | |
) | |
# kernel modules don't request particular memory offsets. they must be PIE as | |
# they have to be loaded into kernel address space. | |
# This means that when we mimick performing relocations, we can pick whatever | |
# virtual addresses we want. | |
# Let's pick a scheme that will match the ghidra listing view. | |
# Make base match the start of the first section. | |
base = 0x00100000 | |
start_offset = next( | |
filter(lambda sec: sec["sh_type"] != "SHT_NULL", e.iter_sections()) | |
)["sh_offset"] | |
offset_to_addr = lambda offset: offset + base - start_offset | |
addr_to_offset = lambda addr: addr - base + start_offset | |
addresses = { | |
sec.name: offset_to_addr(sec["sh_offset"]) for sec in e.iter_sections() | |
} | |
if len(addresses) != e.num_sections(): | |
raise AssertionError("duplicate section names") | |
module_section = e.get_section_by_name(".gnu.linkonce.this_module") | |
rel = reler.find_relocations_for_section(module_section) | |
reler.apply_section_relocations(f, addresses, module_section, rel) | |
data = e.get_section_by_name(".data") | |
rel = reler.find_relocations_for_section(data) | |
reler.apply_section_relocations(f, addresses, data, rel) | |
rel = reler.find_relocations_for_section(text) | |
reler.apply_section_relocations(f, addresses, text, rel) | |
# another way to find the functions we want is to find the call sites of the | |
# kernel APIs they use. In order to be relocated properly, the module will have | |
# to reference the symbol names they want to import. We can trace these | |
# relocations back to the `call`` instruction they are adjusting. | |
proc_create_reloc = relocation_for_symbol(e, trela, "proc_create") | |
print("proc_create reloc", proc_create_reloc) | |
func, func_offset = find_continaing_function( | |
e, trela, proc_create_reloc["r_offset"] | |
) | |
print("func", func.name, func.entry, func_offset) | |
init_text = read_symbol(f, text, func) | |
# struct proc_dir_entry *proc_create( | |
# const char *name, // arg 0 | |
# umode_t mode, // arg 1 | |
# struct proc_dir_entry *parent, // arg 2 | |
# const struct file_operations *proc_fops // arg 3 | |
# ); | |
# ...so proc_fops is calling_convention[3] | |
fops_loading_insn = insn_modifying_reg_before( | |
e, trela, init_text, calling_convention[3], func, func_offset | |
) | |
print(fops_loading_insn) | |
if fops_loading_insn.insn_name() != "mov": | |
raise AssertionError() | |
if len(fops_loading_insn.operands) != 2: | |
raise AssertionError() | |
_, fops_operand = fops_loading_insn.operands | |
if fops_operand.type != CS_OP_IMM: | |
raise AssertionError() | |
fops_addr = fops_operand.imm | |
print("fops addr", hex(fops_addr)) | |
fops_file_offset = addr_to_offset(fops_addr) | |
print("fops file offset", hex(fops_file_offset)) | |
unlocked_ioctl_offset = 0x50 | |
f.seek(fops_file_offset + unlocked_ioctl_offset) | |
ioctl_addr = e.structs.Elf_word64("").parse_stream(f) | |
print("ioctl addr:", hex(ioctl_addr)) | |
ioctl_offset = addr_to_offset(ioctl_addr) | |
print("ioctl offset:", hex(ioctl_offset)) | |
md = Cs(CS_ARCH_X86, CS_MODE_64) | |
# detail needed to populate insn.operands | |
md.detail = True | |
f.seek(ioctl_offset) | |
ioctl_code = f.read(256) | |
insn_gen = md.disasm(ioctl_code, ioctl_addr) | |
while True: | |
try: | |
insn = next(insn_gen) | |
except StopIteration as e: | |
raise AssertionError("didn't find ioctl opcode cmp instruction") from e | |
if insn.id == X86_INS_CMP and any( | |
i.type == X86_OP_IMM and i.imm == 0x539 for i in insn.operands | |
): | |
break | |
insn = next(insn_gen) | |
print(insn) | |
if insn.id != X86_INS_JE: | |
raise AssertionError() | |
# jmp always has one operand | |
ioctl_part2_op, = insn.operands | |
if ioctl_part2_op.type != X86_OP_IMM: | |
raise AssertionError() | |
# as long as we set the disassembly base address correctly, capstone will do | |
# the relative jump calculation for us (ins addr + ins size + rel value) | |
ioctl_part2_addr = ioctl_part2_op.imm | |
print("ioctl_part2_addr", hex(ioctl_part2_addr)) | |
ioctl_part2_offset = addr_to_offset(ioctl_part2_addr) | |
print("ioctl_part2_offset", hex(ioctl_part2_offset)) | |
f.seek(ioctl_part2_offset) | |
gen = md.disasm(f.read(256), ioctl_part2_addr) | |
while True: | |
try: | |
insn = next(gen) | |
except StopIteration as e: | |
raise AssertionError("didn't find interpreter loop end comparison") from e | |
# the following block searches for this instruction: | |
# cmp byte ptr [rsp + <ip_rsp_off>], 0xff | |
if insn.id == X86_INS_CMP: | |
# cmp always has two operands | |
a, b = insn.operands | |
# we are going to make these checks pretty strict, at the risk of | |
# being broken by changes in optimizations from level to level. | |
# order of a, b will always be the same due to instruction encoding. | |
if ( | |
a.type == X86_OP_MEM | |
and a.size == 1 # operand size 1 byte => byte ptr | |
and a.mem.base == X86_REG_RSP | |
and b.type == X86_OP_IMM | |
and b.imm == 0xff | |
): | |
break | |
ip_rsp_off = a.mem.disp | |
loop_end_addr = insn.address | |
jne = next(gen) | |
if jne.id != X86_INS_JNE: | |
raise AssertionError() | |
jne_op = jne.operands[0] | |
if jne_op.type != X86_OP_IMM: | |
raise AssertionError() | |
loop_start_addr = jne_op.value.imm | |
print("loop start", hex(loop_start_addr)) | |
print("loop end", hex(loop_end_addr)) | |
f.seek(addr_to_offset(loop_start_addr)) | |
gen = md.disasm(f.read(loop_end_addr - loop_start_addr), loop_start_addr) | |
# find the last call instruction within the loop | |
call_insn = None | |
for insn in gen: | |
if insn.id == X86_INS_CALL: | |
call_insn = insn | |
print(call_insn) | |
# void interpret_instruction(vmstate_t *state, instruction_t ins) | |
target_reg = calling_convention[1] | |
f.seek(addr_to_offset(loop_start_addr)) | |
gen = md.disasm(f.read(call_insn.address - loop_start_addr), loop_start_addr) | |
# this next bit is going to unfortunately be even more optimization dependent | |
# than usual. I will add more cases as needed. | |
while True: | |
try: | |
insn = next(gen) | |
except StopIteration as e: | |
raise AssertionError("didn't find load ip insn") from e | |
# we look for this instruction: | |
# movzx <some reg>, byte ptr [rsp + <ip_rsp_off>] | |
if ( | |
insn.id == X86_INS_MOVZX | |
and insn.operands[1].type == X86_OP_MEM | |
and insn.operands[1].mem.base == X86_REG_RSP | |
and insn.operands[1].mem.disp == ip_rsp_off | |
and insn.operands[1].size == 1 | |
and insn.operands[0].type == X86_OP_REG | |
): | |
break | |
ip_reg = insn.operands[0].reg | |
print("ip reg is", md.reg_name(ip_reg)) | |
# later: | |
# lea <ip reg>, [<ip reg> + <reg> * 2] # multiply by sizeof(yan85 instruction) = 3 | |
# add <ip reg>, <&start of memory> | |
# no need to look for them | |
# find: movzx <reg>, word ptr [<ip reg>] | |
while True: | |
try: | |
insn = next(gen) | |
except StopIteration as e: | |
raise AssertionError("didn't find load instruction low bytes insn") from e | |
if ( | |
insn.id == X86_INS_MOVZX | |
and insn.operands[0].type == X86_OP_REG | |
and insn.operands[1].type == X86_OP_MEM | |
and insn.operands[1].size == 2 | |
and insn.operands[1].mem.base == ip_reg | |
): | |
return | |
# we assume that fops is located in the .data section, so find the corresponding | |
# relocation. | |
fops_reloc = reloc_in_range( | |
e, | |
trela, | |
func["st_value"] + fops_loading_insn.address, | |
fops_loading_insn.size, | |
) | |
print(fops_reloc) | |
# for information about relocation types, see page 72 of the System V AMD64 ABI | |
# documentation: https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf | |
if fops_reloc["r_info_type"] != ENUM_RELOC_TYPE_x64["R_X86_64_32S"]: | |
raise AssertionError("fops reloc type changed. investigate.") | |
# you can find these offsets either from subtracting in the ghidra listing, or | |
# by using offsetof(struct file_operations, unlocked_ioctl) in a kernel module | |
# you compile, then disassembling it (or you could load it and then printk). | |
# I wanted to make a one-liner you could use to dump an offset, but including | |
# any one linux kernel header pulls in a ton of other header files that don't | |
# easily work outside of the normal build process. | |
unlocked_ioctl_offset = 0x50 | |
# we assume that the entry in .data will be a function pointer, and therefore | |
# will be written by a relocation. | |
container_sym = e.get_section(trela["sh_link"]).get_symbol( | |
fops_reloc["r_info_sym"] | |
) | |
if container_sym["st_info"]["type"] != "STT_SECTION": | |
raise AssertionError("fops referenced symbol changed. investigate") | |
container_section = e.get_section(container_sym["st_shndx"]) | |
container_rela = reler.find_relocations_for_section(container_section) | |
container_offset = ( | |
container_sym["st_value"] + fops_reloc["r_addend"] + unlocked_ioctl_offset | |
) | |
print(hex(container_offset)) | |
if fops_reloc["r_info_type"] != 11: | |
raise AssertionError("fops relocation type changed, check that it is still 8 bytes") | |
ioctl_reloc = reloc_in_range(e, container_rela, container_offset, 8) | |
print(e.get_section(container_rela["sh_link"]).get_symbol( | |
ioctl_reloc["r_info_sym"] | |
).entry) | |
print( | |
read_symbol_relative_reloc( | |
e, trela, fops_reloc, unlocked_ioctl_offset + 0x8 | |
) | |
) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from elftools.elf.elffile import ELFFile | |
from elftools.elf.sections import SymbolTableSection | |
from elftools.elf.relocation import RelocationHandler | |
from elftools.elf.enums import ENUM_RELOC_TYPE_x64 | |
from elftools.elf.constants import SHN_INDICES | |
from binascii import hexlify | |
from io import BytesIO | |
# from pwn import disasm | |
from capstone import Cs, CS_ARCH_X86, CS_MODE_64 | |
from capstone.x86 import * | |
def symbol_by_name(symbols, sym): | |
"""Find the first symbol with a name matching `sym`.""" | |
for i in range(symbols.num_symbols()): | |
s = symbols.get_symbol(i) | |
if s.name == sym: | |
return s | |
raise AssertionError( | |
f"unable to find symbol {sym!r} in {symbols.num_symbols()} entry symbol table" | |
) | |
def wip_apply_relocations(): | |
"""Messing with applying relocations. not currently used.""" | |
reler = RelocationHandler(e) | |
reler._RELOCATION_RECIPES_X64[ENUM_RELOC_TYPE_x64["R_X86_64_PLT32"]] = ( | |
RelocationHandler._RELOCATION_RECIPE_TYPE( | |
bytesize=4, | |
has_addend=True, | |
calc_func=RelocationHandler._reloc_calc_sym_plus_addend_pcrel, | |
) | |
) | |
rel = reler.find_relocations_for_section(text) | |
# reler.apply_section_relocations(f, rel) | |
def relocations_for_symbol(e, rela, symbol): | |
""" | |
Find all relocations that with a name matching `symbol` in the symbol table | |
associated with `rela`. | |
""" | |
symtab = e.get_section(rela["sh_link"]) | |
return [ | |
r | |
for r in rela.iter_relocations() | |
if symtab.get_symbol(r["r_info_sym"]).name == symbol | |
] | |
def relocation_for_symbol(e, rela, func): | |
""" | |
Find a single relocation with a name matching `symbol` in the symbol table | |
associated with `rela`. Assert that there is only one such matching relocation, | |
and return it. | |
""" | |
rels = relocations_for_symbol(e, rela, func) | |
if len(rels) == 0: | |
raise AssertionError(f"no relocations for func {func!r}") | |
if len(rels) != 1: | |
raise AssertionError( | |
f"expected one relocation for func {func!r}, instead got {len(rels)}" | |
) | |
return rels[0] | |
def find_continaing_function(e, rela, section_offset): | |
""" | |
Find the first STT_FUNC symbol in the symbol table associated with `rela` that | |
contains the byte that lies `section_offset` bytes from the start of the section | |
`rela` is associated with. | |
Contains means within the range [st_value, st_value+st_size). | |
Return the symbol, and the number of bytes from the start of the function to the offset. | |
""" | |
symtab = e.get_section(rela["sh_link"]) | |
for sym in symtab.iter_symbols(): | |
if sym["st_info"]["type"] != "STT_FUNC": | |
continue | |
if ( | |
section_offset >= sym["st_value"] | |
and section_offset <= sym["st_value"] + sym["st_size"] | |
): | |
return sym, section_offset - sym["st_value"] | |
raise AssertionError(f"no STT_FUNC contains this offset") | |
def read_exact(f, n): | |
buff = bytearray(n) | |
pos = 0 | |
while pos < n: | |
cr = f.readinto(memoryview(buff)[pos:]) | |
if cr == 0: | |
raise EOFError | |
pos += cr | |
return buff | |
def read_symbol(f, section, sym): | |
""" | |
Given a `section` and a symbol `sym` assumed to be in it, return the byte contents | |
of that symbol. | |
""" | |
f.seek(section["sh_offset"] + sym["st_value"]) | |
return read_exact(f, sym["st_size"]) | |
calling_convention = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"] | |
calling_convention = [ | |
X86_REG_RDI, | |
X86_REG_RSI, | |
X86_REG_RDX, | |
X86_REG_RCX, | |
X86_REG_R8, | |
X86_REG_R9, | |
] | |
def relocs_in_range(e, rela, start, sz): | |
""" | |
Generator that will yield all relocations in `rela` within the range of section | |
offsets [start, start+sz) given an ELFFile `e` | |
""" | |
for reloc in rela.iter_relocations(): | |
if reloc["r_offset"] >= start and reloc["r_offset"] <= start + sz: | |
yield reloc | |
def reloc_in_range(e, rela, start, sz): | |
relocs = list(relocs_in_range(e, rela, start, sz)) | |
if not relocs: | |
raise AssertionError("no relocations found in range") | |
if len(relocs) != 1: | |
raise AssertionError(f"found {len(relocs)} relocs in range") | |
return relocs[0] | |
def insn_modifying_reg_before(e, trela, text, reg, func, func_offset): | |
""" | |
Return the last instruction after the start of the function given by the symbol | |
`func` but before `func_offset` instruction bytes from that start, that modifies | |
`reg`, given an ELFFile `e`, text relocation table `trela`, and text section `text`. | |
""" | |
md = Cs(CS_ARCH_X86, CS_MODE_64) | |
md.detail = True | |
found = [] | |
for i in md.disasm(text, 0x0): | |
# stop when we get to the instruction containing the relocation in question | |
if i.address + i.size >= func_offset: | |
continue | |
_, regs_written = i.regs_access() | |
if reg in regs_written: | |
found.append(i) | |
if not found: | |
raise AssertionError(f"cannot find an instruction modifying {reg!r}") | |
return found[-1] | |
def read_symbol_relative_reloc(e, rela, reloc, n): | |
symtab = e.get_section(rela["sh_link"]) | |
sym = symtab.get_symbol(reloc["r_info_sym"]) | |
section = e.get_section(sym["st_shndx"]) | |
location = section["sh_offset"] + sym["st_value"] + reloc["r_addend"] | |
print(f"seek to {hex(location)}={location}") | |
e.stream.seek(location) | |
return e.stream.read(n) | |
def main(): | |
with open(sys.argv[1], "rb") as f_rdonly: | |
# if you want to modify the data, such as by applying relocations, read the | |
# entire file into a BytesIO and use it. | |
# f = BytesIO(f_rdonly.read()) | |
# otherwise, use the read-only file handle directly. | |
f = f_rdonly | |
e = ELFFile(f) | |
text = e.get_section_by_name(".text") | |
reler = RelocationHandler(e) | |
trela = reler.find_relocations_for_section(text) | |
if not trela.is_RELA(): | |
raise AssertionError( | |
"text relocations are REL but expected RELA. this will probably work " | |
+ "just as well, but it hasn't been tested." | |
) | |
# when compiling a kernel module, the compiler will create an init_module | |
# symbol, and set it to be an alias of the module's init function. It will | |
# then export init_module as a global symbol. | |
# source: | |
# https://terenceli.github.io/%E6%8A%80%E6%9C%AF/2018/06/02/linux-loadable-module | |
symbols = e.get_section_by_name(".symtab") | |
if not isinstance(symbols, SymbolTableSection): | |
raise AssertionError("unable to load symbol table") | |
init = symbol_by_name(symbols, "init_module") | |
if init["st_info"]["type"] != "STT_FUNC": | |
raise AssertionError("expected init_module to be an STT_FUNC") | |
# another way to find the functions we want is to find the call sites of the | |
# kernel APIs they use. In order to be relocated properly, the module will have | |
# to reference the symbol names they want to import. We can trace these | |
# relocations back to the `call`` instruction they are adjusting. | |
proc_create_reloc = relocation_for_symbol(e, trela, "proc_create") | |
print(proc_create_reloc) | |
func, func_offset = find_continaing_function( | |
e, trela, proc_create_reloc["r_offset"] | |
) | |
print(func, func.name, func.entry, func_offset) | |
init_text = read_symbol(f, text, func) | |
print(init_text) | |
print(hex(func_offset)) | |
# struct proc_dir_entry *proc_create( | |
# const char *name, // arg 0 | |
# umode_t mode, // arg 1 | |
# struct proc_dir_entry *parent, // arg 2 | |
# const struct file_operations *proc_fops // arg 3 | |
# ); | |
# ...so proc_fops is calling_convention[3] | |
fops_loading_insn = insn_modifying_reg_before( | |
e, trela, init_text, calling_convention[3], func, func_offset | |
) | |
# we assume that fops is located in the .data section, so find the corresponding | |
# relocation. | |
fops_reloc = reloc_in_range( | |
e, | |
trela, | |
func["st_value"] + fops_loading_insn.address, | |
fops_loading_insn.size, | |
) | |
print(fops_reloc) | |
# for information about relocation types, see page 72 of the System V AMD64 ABI | |
# documentation: https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf | |
if fops_reloc["r_info_type"] != ENUM_RELOC_TYPE_x64["R_X86_64_32S"]: | |
raise AssertionError("fops reloc type changed. investigate.") | |
# you can find these offsets either from subtracting in the ghidra listing, or | |
# by using offsetof(struct file_operations, unlocked_ioctl) in a kernel module | |
# you compile, then disassembling it (or you could load it and then printk). | |
# I wanted to make a one-liner you could use to dump an offset, but linux/fs.h | |
# pulls in a ton of other headers files that don't easily work when you try to | |
# sidestep the normal build process. | |
unlocked_ioctl_offset = 0x50 | |
# we assume that the entry in .data will be a function pointer, and therefore | |
# will be written by a relocation. | |
container_sym = e.get_section(trela["sh_link"]).get_symbol( | |
fops_reloc["r_info_sym"] | |
) | |
if container_sym["st_info"]["type"] != "STT_SECTION": | |
raise AssertionError("fops referenced symbol changed. investigate") | |
container_section = e.get_section(container_sym["st_shndx"]) | |
container_rela = reler.find_relocations_for_section(container_section) | |
container_offset = ( | |
container_sym["st_value"] + fops_reloc["r_addend"] + unlocked_ioctl_offset | |
) | |
print(hex(container_offset)) | |
if fops_reloc["r_info_type"] != 11: | |
raise AssertionError("fops relocation type changed, check that it is still 8 bytes") | |
ioctl_reloc = reloc_in_range(e, container_rela, container_offset, 8) | |
print(e.get_section(container_rela["sh_link"]).get_symbol( | |
ioctl_reloc["r_info_sym"] | |
).entry) | |
print( | |
read_symbol_relative_reloc( | |
e, trela, fops_reloc, unlocked_ioctl_offset + 0x8 | |
) | |
) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SUBREGS = { | |
"rax": [ | |
Reg(19, 32), # eax | |
Reg(3, 16), # ax | |
Reg(2, 8), # al | |
Reg(1, 8), # ah | |
Reg(0, 64), # HAX | |
], | |
"rbp": [ | |
Reg(20, 32), # ebp | |
Reg(6, 16), # bp | |
Reg(7, 16), # bpl | |
Reg(0, 8), # BPH | |
Reg(0, 64), # HBP | |
], | |
"rbx": [ | |
Reg(21, 32), # ebx | |
Reg(8, 8), # bx | |
Reg(5, 8), # bl | |
Reg(4, 8), # bh | |
Reg(0, 64), # HBX | |
], | |
"rcx": [ | |
Reg(22, 64), # ecx | |
Reg(12, 8), # cx | |
Reg(10, 16), # cl | |
Reg(9, 8), # ch | |
Reg(0, 64), # HCX | |
], | |
"rdi": [ | |
Reg(23, 32), # edi | |
Reg(14, 8), # di | |
Reg(15, 16), # dil | |
Reg(0, 16), # DIH | |
Reg(0, 64), # HDI | |
], | |
"rdx": [ | |
Reg(24, 32), # edx | |
Reg(18, 32), # dx | |
Reg(16, 32), # dl | |
Reg(13, 8), # dh | |
Reg(0, 64), # HDX | |
], | |
"rip": [ | |
Reg(26, 32), # eip | |
Reg(34, 8), # ip | |
Reg(0, 64), # HIP | |
], | |
"rsi": [ | |
Reg(29, 16), # esi | |
Reg(45, 64), # si | |
Reg(46, 64), # sil | |
Reg(0, 64), # SIH | |
Reg(0, 64), # HSI | |
], | |
"rsp": [ | |
Reg(30, 16), # esp | |
Reg(47, 64), # sp | |
Reg(48, 64), # spl | |
Reg(0, 64), # SPH | |
Reg(0, 16), # HSP | |
], | |
"r8": [ | |
Reg(226, 0), # r8d | |
Reg(234, 0), # r8w | |
Reg(218, 16), # r8b | |
Reg(0, 128), # R8BH | |
Reg(0, 0), # R8WH | |
], | |
"r9": [ | |
Reg(227, 0), # r9d | |
Reg(235, 8), # r9w | |
Reg(219, 16), # r9b | |
Reg(0, 0), # R9BH | |
Reg(0, 24), # R9WH | |
], | |
"r10": [ | |
Reg(228, 0), # r10d | |
Reg(236, 0), # r10w | |
Reg(220, 16), # r10b | |
Reg(0, 0), # R10BH | |
Reg(0, 0), # R10WH | |
], | |
"r11": [ | |
Reg(229, 0), # r11d | |
Reg(237, 16), # r11w | |
Reg(221, 16), # r11b | |
Reg(0, 0), # R11BH | |
Reg(0, 32), # R11WH | |
], | |
"r12": [ | |
Reg(230, 0), # r12d | |
Reg(238, 0), # r12w | |
Reg(222, 16), # r12b | |
Reg(0, 0), # R12BH | |
Reg(0, 0), # R12WH | |
], | |
"r13": [ | |
Reg(231, 0), # r13d | |
Reg(239, 16), # r13w | |
Reg(223, 128), # r13b | |
Reg(0, 0), # R13BH | |
Reg(0, 32), # R13WH | |
], | |
"r14": [ | |
Reg(232, 0), # r14d | |
Reg(240, 0), # r14w | |
Reg(224, 128), # r14b | |
Reg(0, 0), # R14BH | |
Reg(0, 0), # R14WH | |
], | |
"r15": [ | |
Reg(233, 8), # r15d | |
Reg(241, 24), # r15w | |
Reg(225, 128), # r15b | |
Reg(0, 0), # R15BH | |
Reg(0, 40), # R15WH | |
], | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const uint8_t regsize_map_64 [] = { | |
0, // { X86_REG_INVALID, NULL }, | |
1, // { X86_REG_AH, "ah" }, | |
1, // { X86_REG_AL, "al" }, | |
2, // { X86_REG_AX, "ax" }, | |
1, // { X86_REG_BH, "bh" }, | |
1, // { X86_REG_BL, "bl" }, | |
2, // { X86_REG_BP, "bp" }, | |
1, // { X86_REG_BPL, "bpl" }, | |
2, // { X86_REG_BX, "bx" }, | |
1, // { X86_REG_CH, "ch" }, | |
1, // { X86_REG_CL, "cl" }, | |
2, // { X86_REG_CS, "cs" }, | |
2, // { X86_REG_CX, "cx" }, | |
1, // { X86_REG_DH, "dh" }, | |
2, // { X86_REG_DI, "di" }, | |
1, // { X86_REG_DIL, "dil" }, | |
1, // { X86_REG_DL, "dl" }, | |
2, // { X86_REG_DS, "ds" }, | |
2, // { X86_REG_DX, "dx" }, | |
4, // { X86_REG_EAX, "eax" }, | |
4, // { X86_REG_EBP, "ebp" }, | |
4, // { X86_REG_EBX, "ebx" }, | |
4, // { X86_REG_ECX, "ecx" }, | |
4, // { X86_REG_EDI, "edi" }, | |
4, // { X86_REG_EDX, "edx" }, | |
8, // { X86_REG_EFLAGS, "flags" }, | |
4, // { X86_REG_EIP, "eip" }, | |
4, // { X86_REG_EIZ, "eiz" }, | |
2, // { X86_REG_ES, "es" }, | |
4, // { X86_REG_ESI, "esi" }, | |
4, // { X86_REG_ESP, "esp" }, | |
10, // { X86_REG_FPSW, "fpsw" }, | |
2, // { X86_REG_FS, "fs" }, | |
2, // { X86_REG_GS, "gs" }, | |
2, // { X86_REG_IP, "ip" }, | |
8, // { X86_REG_RAX, "rax" }, | |
8, // { X86_REG_RBP, "rbp" }, | |
8, // { X86_REG_RBX, "rbx" }, | |
8, // { X86_REG_RCX, "rcx" }, | |
8, // { X86_REG_RDI, "rdi" }, | |
8, // { X86_REG_RDX, "rdx" }, | |
8, // { X86_REG_RIP, "rip" }, | |
8, // { X86_REG_RIZ, "riz" }, | |
8, // { X86_REG_RSI, "rsi" }, | |
8, // { X86_REG_RSP, "rsp" }, | |
2, // { X86_REG_SI, "si" }, | |
1, // { X86_REG_SIL, "sil" }, | |
2, // { X86_REG_SP, "sp" }, | |
1, // { X86_REG_SPL, "spl" }, | |
2, // { X86_REG_SS, "ss" }, | |
8, // { X86_REG_CR0, "cr0" }, | |
8, // { X86_REG_CR1, "cr1" }, | |
8, // { X86_REG_CR2, "cr2" }, | |
8, // { X86_REG_CR3, "cr3" }, | |
8, // { X86_REG_CR4, "cr4" }, | |
8, // { X86_REG_CR5, "cr5" }, | |
8, // { X86_REG_CR6, "cr6" }, | |
8, // { X86_REG_CR7, "cr7" }, | |
8, // { X86_REG_CR8, "cr8" }, | |
8, // { X86_REG_CR9, "cr9" }, | |
8, // { X86_REG_CR10, "cr10" }, | |
8, // { X86_REG_CR11, "cr11" }, | |
8, // { X86_REG_CR12, "cr12" }, | |
8, // { X86_REG_CR13, "cr13" }, | |
8, // { X86_REG_CR14, "cr14" }, | |
8, // { X86_REG_CR15, "cr15" }, | |
8, // { X86_REG_DR0, "dr0" }, | |
8, // { X86_REG_DR1, "dr1" }, | |
8, // { X86_REG_DR2, "dr2" }, | |
8, // { X86_REG_DR3, "dr3" }, | |
8, // { X86_REG_DR4, "dr4" }, | |
8, // { X86_REG_DR5, "dr5" }, | |
8, // { X86_REG_DR6, "dr6" }, | |
8, // { X86_REG_DR7, "dr7" }, | |
8, // { X86_REG_DR8, "dr8" }, | |
8, // { X86_REG_DR9, "dr9" }, | |
8, // { X86_REG_DR10, "dr10" }, | |
8, // { X86_REG_DR11, "dr11" }, | |
8, // { X86_REG_DR12, "dr12" }, | |
8, // { X86_REG_DR13, "dr13" }, | |
8, // { X86_REG_DR14, "dr14" }, | |
8, // { X86_REG_DR15, "dr15" }, | |
10, // { X86_REG_FP0, "fp0" }, | |
10, // { X86_REG_FP1, "fp1" }, | |
10, // { X86_REG_FP2, "fp2" }, | |
10, // { X86_REG_FP3, "fp3" }, | |
10, // { X86_REG_FP4, "fp4" }, | |
10, // { X86_REG_FP5, "fp5" }, | |
10, // { X86_REG_FP6, "fp6" }, | |
10, // { X86_REG_FP7, "fp7" }, | |
2, // { X86_REG_K0, "k0" }, | |
2, // { X86_REG_K1, "k1" }, | |
2, // { X86_REG_K2, "k2" }, | |
2, // { X86_REG_K3, "k3" }, | |
2, // { X86_REG_K4, "k4" }, | |
2, // { X86_REG_K5, "k5" }, | |
2, // { X86_REG_K6, "k6" }, | |
2, // { X86_REG_K7, "k7" }, | |
8, // { X86_REG_MM0, "mm0" }, | |
8, // { X86_REG_MM1, "mm1" }, | |
8, // { X86_REG_MM2, "mm2" }, | |
8, // { X86_REG_MM3, "mm3" }, | |
8, // { X86_REG_MM4, "mm4" }, | |
8, // { X86_REG_MM5, "mm5" }, | |
8, // { X86_REG_MM6, "mm6" }, | |
8, // { X86_REG_MM7, "mm7" }, | |
8, // { X86_REG_R8, "r8" }, | |
8, // { X86_REG_R9, "r9" }, | |
8, // { X86_REG_R10, "r10" }, | |
8, // { X86_REG_R11, "r11" }, | |
8, // { X86_REG_R12, "r12" }, | |
8, // { X86_REG_R13, "r13" }, | |
8, // { X86_REG_R14, "r14" }, | |
8, // { X86_REG_R15, "r15" }, | |
10, // { X86_REG_ST0, "st0" }, | |
10, // { X86_REG_ST1, "st1" }, | |
10, // { X86_REG_ST2, "st2" }, | |
10, // { X86_REG_ST3, "st3" }, | |
10, // { X86_REG_ST4, "st4" }, | |
10, // { X86_REG_ST5, "st5" }, | |
10, // { X86_REG_ST6, "st6" }, | |
10, // { X86_REG_ST7, "st7" }, | |
16, // { X86_REG_XMM0, "xmm0" }, | |
16, // { X86_REG_XMM1, "xmm1" }, | |
16, // { X86_REG_XMM2, "xmm2" }, | |
16, // { X86_REG_XMM3, "xmm3" }, | |
16, // { X86_REG_XMM4, "xmm4" }, | |
16, // { X86_REG_XMM5, "xmm5" }, | |
16, // { X86_REG_XMM6, "xmm6" }, | |
16, // { X86_REG_XMM7, "xmm7" }, | |
16, // { X86_REG_XMM8, "xmm8" }, | |
16, // { X86_REG_XMM9, "xmm9" }, | |
16, // { X86_REG_XMM10, "xmm10" }, | |
16, // { X86_REG_XMM11, "xmm11" }, | |
16, // { X86_REG_XMM12, "xmm12" }, | |
16, // { X86_REG_XMM13, "xmm13" }, | |
16, // { X86_REG_XMM14, "xmm14" }, | |
16, // { X86_REG_XMM15, "xmm15" }, | |
16, // { X86_REG_XMM16, "xmm16" }, | |
16, // { X86_REG_XMM17, "xmm17" }, | |
16, // { X86_REG_XMM18, "xmm18" }, | |
16, // { X86_REG_XMM19, "xmm19" }, | |
16, // { X86_REG_XMM20, "xmm20" }, | |
16, // { X86_REG_XMM21, "xmm21" }, | |
16, // { X86_REG_XMM22, "xmm22" }, | |
16, // { X86_REG_XMM23, "xmm23" }, | |
16, // { X86_REG_XMM24, "xmm24" }, | |
16, // { X86_REG_XMM25, "xmm25" }, | |
16, // { X86_REG_XMM26, "xmm26" }, | |
16, // { X86_REG_XMM27, "xmm27" }, | |
16, // { X86_REG_XMM28, "xmm28" }, | |
16, // { X86_REG_XMM29, "xmm29" }, | |
16, // { X86_REG_XMM30, "xmm30" }, | |
16, // { X86_REG_XMM31, "xmm31" }, | |
32, // { X86_REG_YMM0, "ymm0" }, | |
32, // { X86_REG_YMM1, "ymm1" }, | |
32, // { X86_REG_YMM2, "ymm2" }, | |
32, // { X86_REG_YMM3, "ymm3" }, | |
32, // { X86_REG_YMM4, "ymm4" }, | |
32, // { X86_REG_YMM5, "ymm5" }, | |
32, // { X86_REG_YMM6, "ymm6" }, | |
32, // { X86_REG_YMM7, "ymm7" }, | |
32, // { X86_REG_YMM8, "ymm8" }, | |
32, // { X86_REG_YMM9, "ymm9" }, | |
32, // { X86_REG_YMM10, "ymm10" }, | |
32, // { X86_REG_YMM11, "ymm11" }, | |
32, // { X86_REG_YMM12, "ymm12" }, | |
32, // { X86_REG_YMM13, "ymm13" }, | |
32, // { X86_REG_YMM14, "ymm14" }, | |
32, // { X86_REG_YMM15, "ymm15" }, | |
32, // { X86_REG_YMM16, "ymm16" }, | |
32, // { X86_REG_YMM17, "ymm17" }, | |
32, // { X86_REG_YMM18, "ymm18" }, | |
32, // { X86_REG_YMM19, "ymm19" }, | |
32, // { X86_REG_YMM20, "ymm20" }, | |
32, // { X86_REG_YMM21, "ymm21" }, | |
32, // { X86_REG_YMM22, "ymm22" }, | |
32, // { X86_REG_YMM23, "ymm23" }, | |
32, // { X86_REG_YMM24, "ymm24" }, | |
32, // { X86_REG_YMM25, "ymm25" }, | |
32, // { X86_REG_YMM26, "ymm26" }, | |
32, // { X86_REG_YMM27, "ymm27" }, | |
32, // { X86_REG_YMM28, "ymm28" }, | |
32, // { X86_REG_YMM29, "ymm29" }, | |
32, // { X86_REG_YMM30, "ymm30" }, | |
32, // { X86_REG_YMM31, "ymm31" }, | |
64, // { X86_REG_ZMM0, "zmm0" }, | |
64, // { X86_REG_ZMM1, "zmm1" }, | |
64, // { X86_REG_ZMM2, "zmm2" }, | |
64, // { X86_REG_ZMM3, "zmm3" }, | |
64, // { X86_REG_ZMM4, "zmm4" }, | |
64, // { X86_REG_ZMM5, "zmm5" }, | |
64, // { X86_REG_ZMM6, "zmm6" }, | |
64, // { X86_REG_ZMM7, "zmm7" }, | |
64, // { X86_REG_ZMM8, "zmm8" }, | |
64, // { X86_REG_ZMM9, "zmm9" }, | |
64, // { X86_REG_ZMM10, "zmm10" }, | |
64, // { X86_REG_ZMM11, "zmm11" }, | |
64, // { X86_REG_ZMM12, "zmm12" }, | |
64, // { X86_REG_ZMM13, "zmm13" }, | |
64, // { X86_REG_ZMM14, "zmm14" }, | |
64, // { X86_REG_ZMM15, "zmm15" }, | |
64, // { X86_REG_ZMM16, "zmm16" }, | |
64, // { X86_REG_ZMM17, "zmm17" }, | |
64, // { X86_REG_ZMM18, "zmm18" }, | |
64, // { X86_REG_ZMM19, "zmm19" }, | |
64, // { X86_REG_ZMM20, "zmm20" }, | |
64, // { X86_REG_ZMM21, "zmm21" }, | |
64, // { X86_REG_ZMM22, "zmm22" }, | |
64, // { X86_REG_ZMM23, "zmm23" }, | |
64, // { X86_REG_ZMM24, "zmm24" }, | |
64, // { X86_REG_ZMM25, "zmm25" }, | |
64, // { X86_REG_ZMM26, "zmm26" }, | |
64, // { X86_REG_ZMM27, "zmm27" }, | |
64, // { X86_REG_ZMM28, "zmm28" }, | |
64, // { X86_REG_ZMM29, "zmm29" }, | |
64, // { X86_REG_ZMM30, "zmm30" }, | |
64, // { X86_REG_ZMM31, "zmm31" }, | |
1, // { X86_REG_R8B, "r8b" }, | |
1, // { X86_REG_R9B, "r9b" }, | |
1, // { X86_REG_R10B, "r10b" }, | |
1, // { X86_REG_R11B, "r11b" }, | |
1, // { X86_REG_R12B, "r12b" }, | |
1, // { X86_REG_R13B, "r13b" }, | |
1, // { X86_REG_R14B, "r14b" }, | |
1, // { X86_REG_R15B, "r15b" }, | |
4, // { X86_REG_R8D, "r8d" }, | |
4, // { X86_REG_R9D, "r9d" }, | |
4, // { X86_REG_R10D, "r10d" }, | |
4, // { X86_REG_R11D, "r11d" }, | |
4, // { X86_REG_R12D, "r12d" }, | |
4, // { X86_REG_R13D, "r13d" }, | |
4, // { X86_REG_R14D, "r14d" }, | |
4, // { X86_REG_R15D, "r15d" }, | |
2, // { X86_REG_R8W, "r8w" }, | |
2, // { X86_REG_R9W, "r9w" }, | |
2, // { X86_REG_R10W, "r10w" }, | |
2, // { X86_REG_R11W, "r11w" }, | |
2, // { X86_REG_R12W, "r12w" }, | |
2, // { X86_REG_R13W, "r13w" }, | |
2, // { X86_REG_R14W, "r14w" }, | |
2, // { X86_REG_R15W, "r15w" }, | |
16, // { X86_REG_BND0, "bnd0" }, | |
16, // { X86_REG_BND1, "bnd0" }, | |
16, // { X86_REG_BND2, "bnd0" }, | |
16, // { X86_REG_BND3, "bnd0" }, | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// printf() | |
#include <stdio.h> | |
// malloc() | |
#include <stdlib.h> | |
//#include "MCRegisterInfo.h" | |
// actually, the DiffListIterator stuff is private, so include the source. | |
#include "MCRegisterInfo.c" | |
// Needed for x86_init | |
#define GET_REGINFO_ENUM | |
#include "arch/X86/X86GenRegisterInfo.inc" | |
#define GET_REGINFO_MC_DESC | |
#include "arch/X86/X86GenRegisterInfo.inc" | |
// define intel variation getRegisterName() | |
#include "arch/X86/X86GenRegisterName1.inc" | |
// define regsize_map_64 | |
//#include "arch/X86/X86Mapping.h" | |
/* | |
<arch/X86/X86Mapping.c awk ' | |
BEGIN { in_struct=0; } | |
/^const uint8_t regsize_map_64 \[\] = {/ { in_struct=1; } | |
/^}/ && in_struct { print; in_struct=0; } | |
in_struct == 1 { print; } | |
' > ~/regsize.c | |
*/ | |
#include "regsize.c" | |
// map internal register id to public register id | |
#include "include/capstone/x86.h" | |
static const struct register_map { | |
unsigned short id; | |
unsigned short pub_id; | |
} reg_map [] = { | |
// first dummy map | |
{ 0, 0 }, | |
#include "arch/X86/X86MappingReg.inc" | |
}; | |
// return 0 on invalid input, or public register ID otherwise | |
// NOTE: reg_map is sorted in order of internal register | |
#include "utils.h" | |
unsigned short X86_register_map(unsigned short id) | |
{ | |
if (id < ARR_SIZE(reg_map)) | |
return reg_map[id].pub_id; | |
return 0; | |
} | |
// modified from arch/X86/X86Disassembler.c (to avoid linking unnecessary things) | |
// to use variable names from X86Mapping.c | |
void X86_init(MCRegisterInfo *MRI) | |
{ | |
// InitMCRegisterInfo(), X86GenRegisterInfo.inc | |
// RI->InitMCRegisterInfo(X86RegDesc, 277, | |
// RA, PC, | |
// X86MCRegisterClasses, 86, | |
// X86RegUnitRoots, 162, X86RegDiffLists, X86LaneMaskLists, X86RegStrings, | |
// X86RegClassStrings, | |
// X86SubRegIdxLists, 9, | |
// X86SubRegIdxRanges, X86RegEncodingTable); | |
/* | |
InitMCRegisterInfo(X86RegDesc, 234, | |
RA, PC, | |
X86MCRegisterClasses, 79, | |
X86RegUnitRoots, 119, X86RegDiffLists, X86RegStrings, | |
X86SubRegIdxLists, 7, | |
X86SubRegIdxRanges, X86RegEncodingTable); | |
*/ | |
MCRegisterInfo_InitMCRegisterInfo(MRI, X86RegDesc, 277, | |
0, 0, | |
X86MCRegisterClasses, 86, | |
0, 0, X86RegDiffLists, 0, | |
X86SubRegIdxLists, 9, | |
0); | |
} | |
int main(void) { | |
MCRegisterInfo *mri; | |
mri = malloc(sizeof(*mri)); | |
X86_init(mri); | |
const MCRegisterClass *gr64_cls = MCRegisterInfo_getRegClass(mri, X86_GR64RegClassID); | |
printf("SUBREGS = {\n"); | |
DiffListIterator iter; | |
const uint16_t *SRI; | |
for (int reg = 1; reg < mri->NumRegs; reg++) { | |
if (!MCRegisterClass_contains(gr64_cls, reg)) continue; | |
printf(" \"%s\": [\n", getRegisterName(reg)); | |
SRI = mri->SubRegIndices + mri->Desc[reg].SubRegIndices; | |
DiffListIterator_init(&iter, (MCPhysReg) reg, mri->DiffLists + mri->Desc[reg].SubRegs); | |
DiffListIterator_next(&iter); | |
while(DiffListIterator_isValid(&iter)) { | |
unsigned subreg = DiffListIterator_getVal(&iter); | |
unsigned pub_id = X86_register_map(subreg); | |
printf(" Reg(%d, %d), # %s\n", pub_id, regsize_map_64[subreg] * 8, getRegisterName(subreg)); | |
++SRI; | |
DiffListIterator_next(&iter); | |
} | |
printf(" ],\n"); | |
} | |
printf("}\n"); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#------------------------------------------------------------------------------- | |
# elftools: elf/relocation.py | |
# | |
# ELF relocations | |
# | |
# Eli Bendersky ([email protected]) | |
# This code is in the public domain | |
#------------------------------------------------------------------------------- | |
from collections import namedtuple | |
from elftools.common.exceptions import ELFRelocationError | |
from elftools.common.utils import elf_assert, struct_parse | |
from elftools.elf.sections import Section | |
from elftools.elf.enums import ( | |
ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, | |
ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, | |
ENUM_RELOC_TYPE_S390X, ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH, | |
ENUM_D_TAG) | |
from elftools.construct import Container | |
from elftools.elf.constants import SHN_INDICES | |
from elftools.elf.relocation import RelocationSection | |
class RelocationHandler(object): | |
""" Handles the logic of relocations in ELF files. | |
""" | |
def __init__(self, elffile): | |
self.elffile = elffile | |
def find_relocations_for_section(self, section): | |
""" Given a section, find the relocation section for it in the ELF | |
file. Return a RelocationSection object, or None if none was | |
found. | |
""" | |
reloc_section_names = ( | |
'.rel' + section.name, | |
'.rela' + section.name) | |
# Find the relocation section aimed at this one. Currently assume | |
# that either .rel or .rela section exists for this section, but | |
# not both. | |
for relsection in self.elffile.iter_sections(): | |
if ( isinstance(relsection, RelocationSection) and | |
relsection.name in reloc_section_names): | |
return relsection | |
return None | |
def apply_section_relocations(self, stream, addresses, section, reloc_section): | |
""" Apply all relocations in reloc_section (a RelocationSection object) | |
to the given stream, that contains the data of the section that is | |
being relocated. The stream is modified as a result. | |
""" | |
# The symbol table associated with this relocation section | |
symtab = self.elffile.get_section(reloc_section['sh_link']) | |
for reloc in reloc_section.iter_relocations(): | |
self._do_apply_relocation(stream, addresses, section, reloc, symtab) | |
def _do_apply_relocation(self, stream, addresses, section, reloc, symtab): | |
# Preparations for performing the relocation: obtain the value of | |
# the symbol mentioned in the relocation, as well as the relocation | |
# recipe which tells us how to actually perform it. | |
# All peppered with some sanity checking. | |
if reloc['r_info_sym'] >= symtab.num_symbols(): | |
raise ELFRelocationError( | |
'Invalid symbol reference in relocation: index %s' % ( | |
reloc['r_info_sym'])) | |
sym = symtab.get_symbol(reloc['r_info_sym']) | |
sym_value = sym['st_value'] | |
sec = None | |
sec_loc = None | |
if sym['st_shndx'] != "SHN_UNDEF": | |
sec = self.elffile.get_section(sym['st_shndx']) | |
sec_loc = addresses.get(sec.name, sec['sh_offset']) | |
sym_value += sec_loc | |
reloc_type = reloc['r_info_type'] | |
recipe = None | |
if self.elffile.get_machine_arch() == 'x86': | |
if reloc.is_RELA(): | |
raise ELFRelocationError( | |
'Unexpected RELA relocation for x86: %s' % reloc) | |
recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'x64': | |
if not reloc.is_RELA(): | |
raise ELFRelocationError( | |
'Unexpected REL relocation for x64: %s' % reloc) | |
recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'MIPS': | |
if reloc.is_RELA(): | |
if reloc_type == ENUM_RELOC_TYPE_MIPS['R_MIPS_64']: | |
if reloc['r_type2'] != 0 or reloc['r_type3'] != 0 or reloc['r_ssym'] != 0: | |
raise ELFRelocationError( | |
'Multiple relocations in R_MIPS_64 are not implemented: %s' % reloc) | |
recipe = self._RELOCATION_RECIPES_MIPS_RELA.get(reloc_type, None) | |
else: | |
recipe = self._RELOCATION_RECIPES_MIPS_REL.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'ARM': | |
if reloc.is_RELA(): | |
raise ELFRelocationError( | |
'Unexpected RELA relocation for ARM: %s' % reloc) | |
recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'AArch64': | |
recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == '64-bit PowerPC': | |
recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'IBM S/390': | |
recipe = self._RELOCATION_RECIPES_S390X.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'Linux BPF - in-kernel virtual machine': | |
recipe = self._RELOCATION_RECIPES_EBPF.get(reloc_type, None) | |
elif self.elffile.get_machine_arch() == 'LoongArch': | |
if not reloc.is_RELA(): | |
raise ELFRelocationError( | |
'Unexpected REL relocation for LoongArch: %s' % reloc) | |
recipe = self._RELOCATION_RECIPES_LOONGARCH.get(reloc_type, None) | |
if recipe is None: | |
raise ELFRelocationError( | |
'Unsupported relocation type: %s' % reloc_type) | |
# So now we have everything we need to actually perform the relocation. | |
# Let's get to it: | |
# 0. Find out which struct we're going to be using to read this value | |
# from the stream and write it back. | |
if recipe.bytesize == 4: | |
value_struct = self.elffile.structs.Elf_word('') | |
elif recipe.bytesize == 8: | |
value_struct = self.elffile.structs.Elf_word64('') | |
elif recipe.bytesize == 1: | |
value_struct = self.elffile.structs.Elf_byte('') | |
elif recipe.bytesize == 2: | |
value_struct = self.elffile.structs.Elf_half('') | |
else: | |
raise ELFRelocationError('Invalid bytesize %s for relocation' % | |
recipe.bytesize) | |
# 1. Read the value from the stream (with correct size and endianness) | |
original_value = struct_parse( | |
value_struct, | |
stream, | |
stream_pos=section["sh_offset"] + reloc['r_offset'] | |
) | |
# "offset", as pyelftools calls it, is the "P" or "place" variable in the | |
# relocation calculation. It really represents the address of the relocation | |
# location, not the offset. So we calculate the address. | |
addr = addresses.get(section.name, section['sh_offset']) + reloc["r_offset"] | |
# 2. Apply the relocation to the value, acting according to the recipe | |
relocated_value = recipe.calc_func( | |
value=original_value, | |
sym_value=sym_value, | |
offset=addr, | |
addend=reloc['r_addend'] if recipe.has_addend else 0 | |
) | |
# for debugging, dumps useful information about the calculations | |
# adjust condition to match the relocation you want to debug | |
if reloc["r_offset"] == 1750: | |
print( | |
reloc, | |
"file_offset=" + hex(section["sh_offset"] + reloc["r_offset"]), | |
"orig_val=" + hex(original_value), | |
"raw_sym_value=" + hex(sym["st_value"]), | |
"sec=" + f"{sec.name}@{hex(sec_loc)}" if sec else "null", | |
"sym_value=" + hex(sym_value), | |
"addend=" + hex(reloc["r_addend"]), | |
"addr=" + hex(addr), | |
"relocated_value=" + hex(relocated_value) | |
) | |
# 3. Write the relocated value back into the stream | |
stream.seek(section["sh_offset"] + reloc['r_offset']) | |
# Make sure the relocated value fits back by wrapping it around. This | |
# looks like a problem, but it seems to be the way this is done in | |
# binutils too. | |
relocated_value = relocated_value % (2 ** (recipe.bytesize * 8)) | |
value_struct.build_stream(relocated_value, stream) | |
# Relocations are represented by "recipes". Each recipe specifies: | |
# bytesize: The number of bytes to read (and write back) to the section. | |
# This is the unit of data on which relocation is performed. | |
# has_addend: Does this relocation have an extra addend? | |
# calc_func: A function that performs the relocation on an extracted | |
# value, and returns the updated value. | |
# | |
_RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE', | |
'bytesize has_addend calc_func') | |
def _reloc_calc_identity(value, sym_value, offset, addend=0): | |
return value | |
def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0): | |
return sym_value + value + addend | |
def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): | |
return sym_value + value - offset | |
def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): | |
return sym_value + addend | |
def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): | |
return sym_value + addend - offset | |
def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0): | |
return value - sym_value - addend | |
def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): | |
return sym_value // 4 + value - offset // 4 | |
def _bpf_64_32_reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): | |
return (sym_value + addend) // 8 - 1 | |
_RELOCATION_RECIPES_ARM = { | |
ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, | |
calc_func=_arm_reloc_calc_sym_plus_value_pcrel), | |
} | |
_RELOCATION_RECIPES_AARCH64 = { | |
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_addend_pcrel), | |
} | |
# https://dmz-portal.mips.com/wiki/MIPS_relocation_types | |
_RELOCATION_RECIPES_MIPS_REL = { | |
ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, | |
calc_func=_reloc_calc_sym_plus_value), | |
} | |
_RELOCATION_RECIPES_MIPS_RELA = { | |
ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_MIPS['R_MIPS_64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_value), | |
} | |
_RELOCATION_RECIPES_PPC64 = { | |
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel), | |
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
} | |
_RELOCATION_RECIPES_X86 = { | |
ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, | |
calc_func=_reloc_calc_sym_plus_value_pcrel), | |
} | |
_RELOCATION_RECIPES_X64 = { | |
ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_addend_pcrel), | |
ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
} | |
# https://www.kernel.org/doc/html/latest/bpf/llvm_reloc.html#different-relocation-types | |
_RELOCATION_RECIPES_EBPF = { | |
ENUM_RELOC_TYPE_BPF['R_BPF_NONE']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_BPF['R_BPF_64_64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_BPF['R_BPF_64_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=False, calc_func=_bpf_64_32_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_BPF['R_BPF_64_NODYLD32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), | |
} | |
# https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc | |
_RELOCATION_RECIPES_LOONGARCH = { | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD8']: _RELOCATION_RECIPE_TYPE( | |
bytesize=1, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB8']: _RELOCATION_RECIPE_TYPE( | |
bytesize=1, has_addend=True, | |
calc_func=_reloc_calc_value_minus_sym_addend), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD16']: _RELOCATION_RECIPE_TYPE( | |
bytesize=2, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB16']: _RELOCATION_RECIPE_TYPE( | |
bytesize=2, has_addend=True, | |
calc_func=_reloc_calc_value_minus_sym_addend), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_value_minus_sym_addend), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_value), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, | |
calc_func=_reloc_calc_value_minus_sym_addend), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32_PCREL']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_addend_pcrel), | |
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64_PCREL']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, | |
calc_func=_reloc_calc_sym_plus_addend_pcrel), | |
} | |
_RELOCATION_RECIPES_S390X = { | |
ENUM_RELOC_TYPE_S390X['R_390_32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
ENUM_RELOC_TYPE_S390X['R_390_PC32']: _RELOCATION_RECIPE_TYPE( | |
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel), | |
ENUM_RELOC_TYPE_S390X['R_390_64']: _RELOCATION_RECIPE_TYPE( | |
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from glob import glob | |
from pwn import * | |
from elftools.elf.elffile import ELFFile | |
context(arch="amd64") | |
chall, = glob("/challenge/*.ko") | |
f = open(chall, "rb") | |
e = ELFFile(f) | |
get_sec = lambda name: e.get_section_by_name(name) | |
symtab = get_sec(".symtab") | |
get_sym = lambda name: symtab.get_symbol_by_name(name) | |
sym_i = lambda i: symtab.get_symbol(i) | |
text = get_sec(".text") | |
trela = get_sec(".rela.text") | |
data = get_sec(".data") | |
drela = get_sec(".rela.data") | |
mod = get_sec(".gnu.linkonce.this_module") | |
mrela = get_sec(".rela.gnu.linkonce.this_module") | |
base = 0x00100000 | |
start_offset = next( | |
filter(lambda sec: sec["sh_type"] != "SHT_NULL", e.iter_sections()) | |
)["sh_offset"] | |
offset_to_addr = lambda offset: offset + base - start_offset | |
addr_to_offset = lambda addr: addr - base + start_offset | |
addresses = { | |
sec.name: offset_to_addr(sec["sh_offset"]) for sec in e.iter_sections() | |
} | |
if len(addresses) != e.num_sections(): | |
raise AssertionError("duplicate section names") | |
h = hex | |
from capstone import * | |
md = Cs(CS_ARCH_X86, CS_MODE_64) | |
md.detail = True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment