Skip to content

Instantly share code, notes, and snippets.

@spencerpogo
Last active May 22, 2024 20:48
Show Gist options
  • Save spencerpogo/b7530c85705744cce180ec135eba1ebf to your computer and use it in GitHub Desktop.
Save spencerpogo/b7530c85705744cce180ec135eba1ebf to your computer and use it in GitHub Desktop.
WIP
import sys
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
from relocation import RelocationHandler
from elftools.elf.enums import ENUM_RELOC_TYPE_x64
from elftools.elf.constants import SHN_INDICES
from binascii import hexlify
from io import BytesIO
# from pwn import disasm
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from capstone.x86 import *
def symbol_by_name(symbols, sym):
"""Find the first symbol with a name matching `sym`."""
for i in range(symbols.num_symbols()):
s = symbols.get_symbol(i)
if s.name == sym:
return s
raise AssertionError(
f"unable to find symbol {sym!r} in {symbols.num_symbols()} entry symbol table"
)
def wip_apply_relocations():
"""Messing with applying relocations. not currently used."""
reler = RelocationHandler(e)
reler._RELOCATION_RECIPES_X64[ENUM_RELOC_TYPE_x64["R_X86_64_PLT32"]] = (
RelocationHandler._RELOCATION_RECIPE_TYPE(
bytesize=4,
has_addend=True,
calc_func=RelocationHandler._reloc_calc_sym_plus_addend_pcrel,
)
)
rel = reler.find_relocations_for_section(text)
# reler.apply_section_relocations(f, rel)
def relocations_for_symbol(e, rela, symbol):
"""
Find all relocations that with a name matching `symbol` in the symbol table
associated with `rela`.
"""
symtab = e.get_section(rela["sh_link"])
return [
r
for r in rela.iter_relocations()
if symtab.get_symbol(r["r_info_sym"]).name == symbol
]
def relocation_for_symbol(e, rela, func):
"""
Find a single relocation with a name matching `symbol` in the symbol table
associated with `rela`. Assert that there is only one such matching relocation,
and return it.
"""
rels = relocations_for_symbol(e, rela, func)
if len(rels) == 0:
raise AssertionError(f"no relocations for func {func!r}")
if len(rels) != 1:
raise AssertionError(
f"expected one relocation for func {func!r}, instead got {len(rels)}"
)
return rels[0]
def find_continaing_function(e, rela, section_offset):
"""
Find the first STT_FUNC symbol in the symbol table associated with `rela` that
contains the byte that lies `section_offset` bytes from the start of the section
`rela` is associated with.
Contains means within the range [st_value, st_value+st_size).
Return the symbol, and the number of bytes from the start of the function to the offset.
"""
symtab = e.get_section(rela["sh_link"])
for sym in symtab.iter_symbols():
if sym["st_info"]["type"] != "STT_FUNC":
continue
if (
section_offset >= sym["st_value"]
and section_offset <= sym["st_value"] + sym["st_size"]
):
return sym, section_offset - sym["st_value"]
raise AssertionError(f"no STT_FUNC contains this offset")
def read_exact(f, n):
buff = bytearray(n)
pos = 0
while pos < n:
cr = f.readinto(memoryview(buff)[pos:])
if cr == 0:
raise EOFError
pos += cr
return buff
def read_symbol(f, section, sym):
"""
Given a `section` and a symbol `sym` assumed to be in it, return the byte contents
of that symbol.
"""
location = section["sh_offset"] + sym["st_value"]
print('seek to', hex(location))
f.seek(location)
return read_exact(f, sym["st_size"])
calling_convention = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]
calling_convention = [
X86_REG_RDI,
X86_REG_RSI,
X86_REG_RDX,
X86_REG_RCX,
X86_REG_R8,
X86_REG_R9,
]
def relocs_in_range(e, rela, start, sz):
"""
Generator that will yield all relocations in `rela` within the range of section
offsets [start, start+sz) given an ELFFile `e`
"""
for reloc in rela.iter_relocations():
if reloc["r_offset"] >= start and reloc["r_offset"] <= start + sz:
yield reloc
def reloc_in_range(e, rela, start, sz):
relocs = list(relocs_in_range(e, rela, start, sz))
if not relocs:
raise AssertionError("no relocations found in range")
if len(relocs) != 1:
raise AssertionError(f"found {len(relocs)} relocs in range")
return relocs[0]
def insn_modifying_reg_before(e, trela, text, reg, func, func_offset):
"""
Return the last instruction after the start of the function given by the symbol
`func` but before `func_offset` instruction bytes from that start, that modifies
`reg`, given an ELFFile `e`, text relocation table `trela`, and text section `text`.
"""
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
found = []
for i in md.disasm(text, 0x0):
# stop when we get to the instruction containing the relocation in question
if i.address + i.size >= func_offset:
continue
_, regs_written = i.regs_access()
if reg in regs_written:
found.append(i)
if not found:
raise AssertionError(f"cannot find an instruction modifying {reg!r}")
return found[-1]
def read_symbol_relative_reloc(e, rela, reloc, n):
symtab = e.get_section(rela["sh_link"])
sym = symtab.get_symbol(reloc["r_info_sym"])
section = e.get_section(sym["st_shndx"])
location = section["sh_offset"] + sym["st_value"] + reloc["r_addend"]
print(f"seek to {hex(location)}={location}")
e.stream.seek(location)
return e.stream.read(n)
def unused_subroutine_1():
for _ in [1]:
# when compiling a kernel module, the compiler will create an init_module
# symbol, and set it to be an alias of the module's init function. It will
# then export init_module as a global symbol.
# source:
# https://terenceli.github.io/%E6%8A%80%E6%9C%AF/2018/06/02/linux-loadable-module
# The values of mod->init and mod->exit come from the struct module
# __this_module variable contained in the .gnu.linkonce.this_module section of
# the module's .ko file. The kernel assumes that this variable is at the start
# of the section, and in fact it is the only variable in the section.
# https://stackoverflow.com/a/68166097/9196137
init_func_offset = 0x150
f.seek(module_section["sh_offset"] + init_func_offset)
init_addr = e.structs.Elf_word64('').parse_stream(f)
print("virtual address", hex(init_addr))
init_section_offset = init_addr - addresses[".text"]
print("section offset", init_section_offset)
init_file_offset = init_addr - base + start_offset
print("file offset", hex(init_file_offset))
f.seek(init_file_offset)
init_text = f.read(text["sh_size"] - init_section_offset)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(init_text, 0x0):
print(insn)
if insn.insn_name() in {"ret", "retq"}:
break
with open('out', 'wb') as fout:
f.seek(0)
fout.write(f.read())
return
symbols = e.get_section_by_name(".symtab")
if not isinstance(symbols, SymbolTableSection):
raise AssertionError("unable to load symbol table")
init = symbol_by_name(symbols, "init_module")
if init["st_info"]["type"] != "STT_FUNC":
raise AssertionError("expected init_module to be an STT_FUNC")
print(hex(text["sh_offset"] + init["st_value"]))
init_text = read_symbol(f, text, init)
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
for insn in md.disasm(init_text, 0x0):
print(insn)
with open('out', 'wb') as fout:
f.seek(0)
fout.write(f.read())
return
def to_signed_32(n):
sign_bit = 1 << 31
# extract non-sign bits
n = n & ((1 << 32) - 1)
# flip sign bit: move negatives from above positives to below
n = n ^ sign_bit
# move negatives below zero
return n - sign_bit
def main():
with open(sys.argv[1], "rb") as f_rdonly:
# if you want to modify the data, such as by applying relocations, read the
# entire file into a BytesIO and use it.
f = BytesIO(f_rdonly.read())
# otherwise, use the read-only file handle directly.
# f = f_rdonly
e = ELFFile(f)
text = e.get_section_by_name(".text")
reler = RelocationHandler(e)
trela = reler.find_relocations_for_section(text)
if not trela.is_RELA():
raise AssertionError(
"text relocations are REL but expected RELA. this will probably work "
+ "just as well, but it hasn't been tested."
)
reler = RelocationHandler(e)
reler._RELOCATION_RECIPES_X64[ENUM_RELOC_TYPE_x64["R_X86_64_PLT32"]] = (
RelocationHandler._RELOCATION_RECIPE_TYPE(
bytesize=4,
has_addend=True,
calc_func=RelocationHandler._reloc_calc_sym_plus_addend_pcrel,
)
)
# kernel modules don't request particular memory offsets. they must be PIE as
# they have to be loaded into kernel address space.
# This means that when we mimick performing relocations, we can pick whatever
# virtual addresses we want.
# Let's pick a scheme that will match the ghidra listing view.
# Make base match the start of the first section.
base = 0x00100000
start_offset = next(
filter(lambda sec: sec["sh_type"] != "SHT_NULL", e.iter_sections())
)["sh_offset"]
offset_to_addr = lambda offset: offset + base - start_offset
addr_to_offset = lambda addr: addr - base + start_offset
addresses = {
sec.name: offset_to_addr(sec["sh_offset"]) for sec in e.iter_sections()
}
if len(addresses) != e.num_sections():
raise AssertionError("duplicate section names")
module_section = e.get_section_by_name(".gnu.linkonce.this_module")
rel = reler.find_relocations_for_section(module_section)
reler.apply_section_relocations(f, addresses, module_section, rel)
data = e.get_section_by_name(".data")
rel = reler.find_relocations_for_section(data)
reler.apply_section_relocations(f, addresses, data, rel)
rel = reler.find_relocations_for_section(text)
reler.apply_section_relocations(f, addresses, text, rel)
# another way to find the functions we want is to find the call sites of the
# kernel APIs they use. In order to be relocated properly, the module will have
# to reference the symbol names they want to import. We can trace these
# relocations back to the `call`` instruction they are adjusting.
proc_create_reloc = relocation_for_symbol(e, trela, "proc_create")
print("proc_create reloc", proc_create_reloc)
func, func_offset = find_continaing_function(
e, trela, proc_create_reloc["r_offset"]
)
print("func", func.name, func.entry, func_offset)
init_text = read_symbol(f, text, func)
# struct proc_dir_entry *proc_create(
# const char *name, // arg 0
# umode_t mode, // arg 1
# struct proc_dir_entry *parent, // arg 2
# const struct file_operations *proc_fops // arg 3
# );
# ...so proc_fops is calling_convention[3]
fops_loading_insn = insn_modifying_reg_before(
e, trela, init_text, calling_convention[3], func, func_offset
)
print(fops_loading_insn)
if fops_loading_insn.insn_name() != "mov":
raise AssertionError()
if len(fops_loading_insn.operands) != 2:
raise AssertionError()
_, fops_operand = fops_loading_insn.operands
if fops_operand.type != CS_OP_IMM:
raise AssertionError()
fops_addr = fops_operand.imm
print("fops addr", hex(fops_addr))
fops_file_offset = addr_to_offset(fops_addr)
print("fops file offset", hex(fops_file_offset))
unlocked_ioctl_offset = 0x50
f.seek(fops_file_offset + unlocked_ioctl_offset)
ioctl_addr = e.structs.Elf_word64("").parse_stream(f)
print("ioctl addr:", hex(ioctl_addr))
ioctl_offset = addr_to_offset(ioctl_addr)
print("ioctl offset:", hex(ioctl_offset))
md = Cs(CS_ARCH_X86, CS_MODE_64)
# detail needed to populate insn.operands
md.detail = True
f.seek(ioctl_offset)
ioctl_code = f.read(256)
insn_gen = md.disasm(ioctl_code, ioctl_addr)
while True:
try:
insn = next(insn_gen)
except StopIteration as e:
raise AssertionError("didn't find ioctl opcode cmp instruction") from e
if insn.id == X86_INS_CMP and any(
i.type == X86_OP_IMM and i.imm == 0x539 for i in insn.operands
):
break
insn = next(insn_gen)
print(insn)
if insn.id != X86_INS_JE:
raise AssertionError()
# jmp always has one operand
ioctl_part2_op, = insn.operands
if ioctl_part2_op.type != X86_OP_IMM:
raise AssertionError()
# as long as we set the disassembly base address correctly, capstone will do
# the relative jump calculation for us (ins addr + ins size + rel value)
ioctl_part2_addr = ioctl_part2_op.imm
print("ioctl_part2_addr", hex(ioctl_part2_addr))
ioctl_part2_offset = addr_to_offset(ioctl_part2_addr)
print("ioctl_part2_offset", hex(ioctl_part2_offset))
f.seek(ioctl_part2_offset)
gen = md.disasm(f.read(256), ioctl_part2_addr)
while True:
try:
insn = next(gen)
except StopIteration as e:
raise AssertionError("didn't find interpreter loop end comparison") from e
# the following block searches for this instruction:
# cmp byte ptr [rsp + <ip_rsp_off>], 0xff
if insn.id == X86_INS_CMP:
# cmp always has two operands
a, b = insn.operands
# we are going to make these checks pretty strict, at the risk of
# being broken by changes in optimizations from level to level.
# order of a, b will always be the same due to instruction encoding.
if (
a.type == X86_OP_MEM
and a.size == 1 # operand size 1 byte => byte ptr
and a.mem.base == X86_REG_RSP
and b.type == X86_OP_IMM
and b.imm == 0xff
):
break
ip_rsp_off = a.mem.disp
loop_end_addr = insn.address
jne = next(gen)
if jne.id != X86_INS_JNE:
raise AssertionError()
jne_op = jne.operands[0]
if jne_op.type != X86_OP_IMM:
raise AssertionError()
loop_start_addr = jne_op.value.imm
print("loop start", hex(loop_start_addr))
print("loop end", hex(loop_end_addr))
f.seek(addr_to_offset(loop_start_addr))
gen = md.disasm(f.read(loop_end_addr - loop_start_addr), loop_start_addr)
# find the last call instruction within the loop
call_insn = None
for insn in gen:
if insn.id == X86_INS_CALL:
call_insn = insn
print(call_insn)
# void interpret_instruction(vmstate_t *state, instruction_t ins)
target_reg = calling_convention[1]
f.seek(addr_to_offset(loop_start_addr))
gen = md.disasm(f.read(call_insn.address - loop_start_addr), loop_start_addr)
# this next bit is going to unfortunately be even more optimization dependent
# than usual. I will add more cases as needed.
while True:
try:
insn = next(gen)
except StopIteration as e:
raise AssertionError("didn't find load ip insn") from e
# we look for this instruction:
# movzx <some reg>, byte ptr [rsp + <ip_rsp_off>]
if (
insn.id == X86_INS_MOVZX
and insn.operands[1].type == X86_OP_MEM
and insn.operands[1].mem.base == X86_REG_RSP
and insn.operands[1].mem.disp == ip_rsp_off
and insn.operands[1].size == 1
and insn.operands[0].type == X86_OP_REG
):
break
ip_reg = insn.operands[0].reg
print("ip reg is", md.reg_name(ip_reg))
# later:
# lea <ip reg>, [<ip reg> + <reg> * 2] # multiply by sizeof(yan85 instruction) = 3
# add <ip reg>, <&start of memory>
# no need to look for them
# find: movzx <reg>, word ptr [<ip reg>]
while True:
try:
insn = next(gen)
except StopIteration as e:
raise AssertionError("didn't find load instruction low bytes insn") from e
if (
insn.id == X86_INS_MOVZX
and insn.operands[0].type == X86_OP_REG
and insn.operands[1].type == X86_OP_MEM
and insn.operands[1].size == 2
and insn.operands[1].mem.base == ip_reg
):
return
# we assume that fops is located in the .data section, so find the corresponding
# relocation.
fops_reloc = reloc_in_range(
e,
trela,
func["st_value"] + fops_loading_insn.address,
fops_loading_insn.size,
)
print(fops_reloc)
# for information about relocation types, see page 72 of the System V AMD64 ABI
# documentation: https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf
if fops_reloc["r_info_type"] != ENUM_RELOC_TYPE_x64["R_X86_64_32S"]:
raise AssertionError("fops reloc type changed. investigate.")
# you can find these offsets either from subtracting in the ghidra listing, or
# by using offsetof(struct file_operations, unlocked_ioctl) in a kernel module
# you compile, then disassembling it (or you could load it and then printk).
# I wanted to make a one-liner you could use to dump an offset, but including
# any one linux kernel header pulls in a ton of other header files that don't
# easily work outside of the normal build process.
unlocked_ioctl_offset = 0x50
# we assume that the entry in .data will be a function pointer, and therefore
# will be written by a relocation.
container_sym = e.get_section(trela["sh_link"]).get_symbol(
fops_reloc["r_info_sym"]
)
if container_sym["st_info"]["type"] != "STT_SECTION":
raise AssertionError("fops referenced symbol changed. investigate")
container_section = e.get_section(container_sym["st_shndx"])
container_rela = reler.find_relocations_for_section(container_section)
container_offset = (
container_sym["st_value"] + fops_reloc["r_addend"] + unlocked_ioctl_offset
)
print(hex(container_offset))
if fops_reloc["r_info_type"] != 11:
raise AssertionError("fops relocation type changed, check that it is still 8 bytes")
ioctl_reloc = reloc_in_range(e, container_rela, container_offset, 8)
print(e.get_section(container_rela["sh_link"]).get_symbol(
ioctl_reloc["r_info_sym"]
).entry)
print(
read_symbol_relative_reloc(
e, trela, fops_reloc, unlocked_ioctl_offset + 0x8
)
)
if __name__ == "__main__":
main()
import sys
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
from elftools.elf.relocation import RelocationHandler
from elftools.elf.enums import ENUM_RELOC_TYPE_x64
from elftools.elf.constants import SHN_INDICES
from binascii import hexlify
from io import BytesIO
# from pwn import disasm
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
from capstone.x86 import *
def symbol_by_name(symbols, sym):
"""Find the first symbol with a name matching `sym`."""
for i in range(symbols.num_symbols()):
s = symbols.get_symbol(i)
if s.name == sym:
return s
raise AssertionError(
f"unable to find symbol {sym!r} in {symbols.num_symbols()} entry symbol table"
)
def wip_apply_relocations():
"""Messing with applying relocations. not currently used."""
reler = RelocationHandler(e)
reler._RELOCATION_RECIPES_X64[ENUM_RELOC_TYPE_x64["R_X86_64_PLT32"]] = (
RelocationHandler._RELOCATION_RECIPE_TYPE(
bytesize=4,
has_addend=True,
calc_func=RelocationHandler._reloc_calc_sym_plus_addend_pcrel,
)
)
rel = reler.find_relocations_for_section(text)
# reler.apply_section_relocations(f, rel)
def relocations_for_symbol(e, rela, symbol):
"""
Find all relocations that with a name matching `symbol` in the symbol table
associated with `rela`.
"""
symtab = e.get_section(rela["sh_link"])
return [
r
for r in rela.iter_relocations()
if symtab.get_symbol(r["r_info_sym"]).name == symbol
]
def relocation_for_symbol(e, rela, func):
"""
Find a single relocation with a name matching `symbol` in the symbol table
associated with `rela`. Assert that there is only one such matching relocation,
and return it.
"""
rels = relocations_for_symbol(e, rela, func)
if len(rels) == 0:
raise AssertionError(f"no relocations for func {func!r}")
if len(rels) != 1:
raise AssertionError(
f"expected one relocation for func {func!r}, instead got {len(rels)}"
)
return rels[0]
def find_continaing_function(e, rela, section_offset):
"""
Find the first STT_FUNC symbol in the symbol table associated with `rela` that
contains the byte that lies `section_offset` bytes from the start of the section
`rela` is associated with.
Contains means within the range [st_value, st_value+st_size).
Return the symbol, and the number of bytes from the start of the function to the offset.
"""
symtab = e.get_section(rela["sh_link"])
for sym in symtab.iter_symbols():
if sym["st_info"]["type"] != "STT_FUNC":
continue
if (
section_offset >= sym["st_value"]
and section_offset <= sym["st_value"] + sym["st_size"]
):
return sym, section_offset - sym["st_value"]
raise AssertionError(f"no STT_FUNC contains this offset")
def read_exact(f, n):
buff = bytearray(n)
pos = 0
while pos < n:
cr = f.readinto(memoryview(buff)[pos:])
if cr == 0:
raise EOFError
pos += cr
return buff
def read_symbol(f, section, sym):
"""
Given a `section` and a symbol `sym` assumed to be in it, return the byte contents
of that symbol.
"""
f.seek(section["sh_offset"] + sym["st_value"])
return read_exact(f, sym["st_size"])
calling_convention = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]
calling_convention = [
X86_REG_RDI,
X86_REG_RSI,
X86_REG_RDX,
X86_REG_RCX,
X86_REG_R8,
X86_REG_R9,
]
def relocs_in_range(e, rela, start, sz):
"""
Generator that will yield all relocations in `rela` within the range of section
offsets [start, start+sz) given an ELFFile `e`
"""
for reloc in rela.iter_relocations():
if reloc["r_offset"] >= start and reloc["r_offset"] <= start + sz:
yield reloc
def reloc_in_range(e, rela, start, sz):
relocs = list(relocs_in_range(e, rela, start, sz))
if not relocs:
raise AssertionError("no relocations found in range")
if len(relocs) != 1:
raise AssertionError(f"found {len(relocs)} relocs in range")
return relocs[0]
def insn_modifying_reg_before(e, trela, text, reg, func, func_offset):
"""
Return the last instruction after the start of the function given by the symbol
`func` but before `func_offset` instruction bytes from that start, that modifies
`reg`, given an ELFFile `e`, text relocation table `trela`, and text section `text`.
"""
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
found = []
for i in md.disasm(text, 0x0):
# stop when we get to the instruction containing the relocation in question
if i.address + i.size >= func_offset:
continue
_, regs_written = i.regs_access()
if reg in regs_written:
found.append(i)
if not found:
raise AssertionError(f"cannot find an instruction modifying {reg!r}")
return found[-1]
def read_symbol_relative_reloc(e, rela, reloc, n):
symtab = e.get_section(rela["sh_link"])
sym = symtab.get_symbol(reloc["r_info_sym"])
section = e.get_section(sym["st_shndx"])
location = section["sh_offset"] + sym["st_value"] + reloc["r_addend"]
print(f"seek to {hex(location)}={location}")
e.stream.seek(location)
return e.stream.read(n)
def main():
with open(sys.argv[1], "rb") as f_rdonly:
# if you want to modify the data, such as by applying relocations, read the
# entire file into a BytesIO and use it.
# f = BytesIO(f_rdonly.read())
# otherwise, use the read-only file handle directly.
f = f_rdonly
e = ELFFile(f)
text = e.get_section_by_name(".text")
reler = RelocationHandler(e)
trela = reler.find_relocations_for_section(text)
if not trela.is_RELA():
raise AssertionError(
"text relocations are REL but expected RELA. this will probably work "
+ "just as well, but it hasn't been tested."
)
# when compiling a kernel module, the compiler will create an init_module
# symbol, and set it to be an alias of the module's init function. It will
# then export init_module as a global symbol.
# source:
# https://terenceli.github.io/%E6%8A%80%E6%9C%AF/2018/06/02/linux-loadable-module
symbols = e.get_section_by_name(".symtab")
if not isinstance(symbols, SymbolTableSection):
raise AssertionError("unable to load symbol table")
init = symbol_by_name(symbols, "init_module")
if init["st_info"]["type"] != "STT_FUNC":
raise AssertionError("expected init_module to be an STT_FUNC")
# another way to find the functions we want is to find the call sites of the
# kernel APIs they use. In order to be relocated properly, the module will have
# to reference the symbol names they want to import. We can trace these
# relocations back to the `call`` instruction they are adjusting.
proc_create_reloc = relocation_for_symbol(e, trela, "proc_create")
print(proc_create_reloc)
func, func_offset = find_continaing_function(
e, trela, proc_create_reloc["r_offset"]
)
print(func, func.name, func.entry, func_offset)
init_text = read_symbol(f, text, func)
print(init_text)
print(hex(func_offset))
# struct proc_dir_entry *proc_create(
# const char *name, // arg 0
# umode_t mode, // arg 1
# struct proc_dir_entry *parent, // arg 2
# const struct file_operations *proc_fops // arg 3
# );
# ...so proc_fops is calling_convention[3]
fops_loading_insn = insn_modifying_reg_before(
e, trela, init_text, calling_convention[3], func, func_offset
)
# we assume that fops is located in the .data section, so find the corresponding
# relocation.
fops_reloc = reloc_in_range(
e,
trela,
func["st_value"] + fops_loading_insn.address,
fops_loading_insn.size,
)
print(fops_reloc)
# for information about relocation types, see page 72 of the System V AMD64 ABI
# documentation: https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf
if fops_reloc["r_info_type"] != ENUM_RELOC_TYPE_x64["R_X86_64_32S"]:
raise AssertionError("fops reloc type changed. investigate.")
# you can find these offsets either from subtracting in the ghidra listing, or
# by using offsetof(struct file_operations, unlocked_ioctl) in a kernel module
# you compile, then disassembling it (or you could load it and then printk).
# I wanted to make a one-liner you could use to dump an offset, but linux/fs.h
# pulls in a ton of other headers files that don't easily work when you try to
# sidestep the normal build process.
unlocked_ioctl_offset = 0x50
# we assume that the entry in .data will be a function pointer, and therefore
# will be written by a relocation.
container_sym = e.get_section(trela["sh_link"]).get_symbol(
fops_reloc["r_info_sym"]
)
if container_sym["st_info"]["type"] != "STT_SECTION":
raise AssertionError("fops referenced symbol changed. investigate")
container_section = e.get_section(container_sym["st_shndx"])
container_rela = reler.find_relocations_for_section(container_section)
container_offset = (
container_sym["st_value"] + fops_reloc["r_addend"] + unlocked_ioctl_offset
)
print(hex(container_offset))
if fops_reloc["r_info_type"] != 11:
raise AssertionError("fops relocation type changed, check that it is still 8 bytes")
ioctl_reloc = reloc_in_range(e, container_rela, container_offset, 8)
print(e.get_section(container_rela["sh_link"]).get_symbol(
ioctl_reloc["r_info_sym"]
).entry)
print(
read_symbol_relative_reloc(
e, trela, fops_reloc, unlocked_ioctl_offset + 0x8
)
)
if __name__ == "__main__":
main()
SUBREGS = {
"rax": [
Reg(19, 32), # eax
Reg(3, 16), # ax
Reg(2, 8), # al
Reg(1, 8), # ah
Reg(0, 64), # HAX
],
"rbp": [
Reg(20, 32), # ebp
Reg(6, 16), # bp
Reg(7, 16), # bpl
Reg(0, 8), # BPH
Reg(0, 64), # HBP
],
"rbx": [
Reg(21, 32), # ebx
Reg(8, 8), # bx
Reg(5, 8), # bl
Reg(4, 8), # bh
Reg(0, 64), # HBX
],
"rcx": [
Reg(22, 64), # ecx
Reg(12, 8), # cx
Reg(10, 16), # cl
Reg(9, 8), # ch
Reg(0, 64), # HCX
],
"rdi": [
Reg(23, 32), # edi
Reg(14, 8), # di
Reg(15, 16), # dil
Reg(0, 16), # DIH
Reg(0, 64), # HDI
],
"rdx": [
Reg(24, 32), # edx
Reg(18, 32), # dx
Reg(16, 32), # dl
Reg(13, 8), # dh
Reg(0, 64), # HDX
],
"rip": [
Reg(26, 32), # eip
Reg(34, 8), # ip
Reg(0, 64), # HIP
],
"rsi": [
Reg(29, 16), # esi
Reg(45, 64), # si
Reg(46, 64), # sil
Reg(0, 64), # SIH
Reg(0, 64), # HSI
],
"rsp": [
Reg(30, 16), # esp
Reg(47, 64), # sp
Reg(48, 64), # spl
Reg(0, 64), # SPH
Reg(0, 16), # HSP
],
"r8": [
Reg(226, 0), # r8d
Reg(234, 0), # r8w
Reg(218, 16), # r8b
Reg(0, 128), # R8BH
Reg(0, 0), # R8WH
],
"r9": [
Reg(227, 0), # r9d
Reg(235, 8), # r9w
Reg(219, 16), # r9b
Reg(0, 0), # R9BH
Reg(0, 24), # R9WH
],
"r10": [
Reg(228, 0), # r10d
Reg(236, 0), # r10w
Reg(220, 16), # r10b
Reg(0, 0), # R10BH
Reg(0, 0), # R10WH
],
"r11": [
Reg(229, 0), # r11d
Reg(237, 16), # r11w
Reg(221, 16), # r11b
Reg(0, 0), # R11BH
Reg(0, 32), # R11WH
],
"r12": [
Reg(230, 0), # r12d
Reg(238, 0), # r12w
Reg(222, 16), # r12b
Reg(0, 0), # R12BH
Reg(0, 0), # R12WH
],
"r13": [
Reg(231, 0), # r13d
Reg(239, 16), # r13w
Reg(223, 128), # r13b
Reg(0, 0), # R13BH
Reg(0, 32), # R13WH
],
"r14": [
Reg(232, 0), # r14d
Reg(240, 0), # r14w
Reg(224, 128), # r14b
Reg(0, 0), # R14BH
Reg(0, 0), # R14WH
],
"r15": [
Reg(233, 8), # r15d
Reg(241, 24), # r15w
Reg(225, 128), # r15b
Reg(0, 0), # R15BH
Reg(0, 40), # R15WH
],
}
const uint8_t regsize_map_64 [] = {
0, // { X86_REG_INVALID, NULL },
1, // { X86_REG_AH, "ah" },
1, // { X86_REG_AL, "al" },
2, // { X86_REG_AX, "ax" },
1, // { X86_REG_BH, "bh" },
1, // { X86_REG_BL, "bl" },
2, // { X86_REG_BP, "bp" },
1, // { X86_REG_BPL, "bpl" },
2, // { X86_REG_BX, "bx" },
1, // { X86_REG_CH, "ch" },
1, // { X86_REG_CL, "cl" },
2, // { X86_REG_CS, "cs" },
2, // { X86_REG_CX, "cx" },
1, // { X86_REG_DH, "dh" },
2, // { X86_REG_DI, "di" },
1, // { X86_REG_DIL, "dil" },
1, // { X86_REG_DL, "dl" },
2, // { X86_REG_DS, "ds" },
2, // { X86_REG_DX, "dx" },
4, // { X86_REG_EAX, "eax" },
4, // { X86_REG_EBP, "ebp" },
4, // { X86_REG_EBX, "ebx" },
4, // { X86_REG_ECX, "ecx" },
4, // { X86_REG_EDI, "edi" },
4, // { X86_REG_EDX, "edx" },
8, // { X86_REG_EFLAGS, "flags" },
4, // { X86_REG_EIP, "eip" },
4, // { X86_REG_EIZ, "eiz" },
2, // { X86_REG_ES, "es" },
4, // { X86_REG_ESI, "esi" },
4, // { X86_REG_ESP, "esp" },
10, // { X86_REG_FPSW, "fpsw" },
2, // { X86_REG_FS, "fs" },
2, // { X86_REG_GS, "gs" },
2, // { X86_REG_IP, "ip" },
8, // { X86_REG_RAX, "rax" },
8, // { X86_REG_RBP, "rbp" },
8, // { X86_REG_RBX, "rbx" },
8, // { X86_REG_RCX, "rcx" },
8, // { X86_REG_RDI, "rdi" },
8, // { X86_REG_RDX, "rdx" },
8, // { X86_REG_RIP, "rip" },
8, // { X86_REG_RIZ, "riz" },
8, // { X86_REG_RSI, "rsi" },
8, // { X86_REG_RSP, "rsp" },
2, // { X86_REG_SI, "si" },
1, // { X86_REG_SIL, "sil" },
2, // { X86_REG_SP, "sp" },
1, // { X86_REG_SPL, "spl" },
2, // { X86_REG_SS, "ss" },
8, // { X86_REG_CR0, "cr0" },
8, // { X86_REG_CR1, "cr1" },
8, // { X86_REG_CR2, "cr2" },
8, // { X86_REG_CR3, "cr3" },
8, // { X86_REG_CR4, "cr4" },
8, // { X86_REG_CR5, "cr5" },
8, // { X86_REG_CR6, "cr6" },
8, // { X86_REG_CR7, "cr7" },
8, // { X86_REG_CR8, "cr8" },
8, // { X86_REG_CR9, "cr9" },
8, // { X86_REG_CR10, "cr10" },
8, // { X86_REG_CR11, "cr11" },
8, // { X86_REG_CR12, "cr12" },
8, // { X86_REG_CR13, "cr13" },
8, // { X86_REG_CR14, "cr14" },
8, // { X86_REG_CR15, "cr15" },
8, // { X86_REG_DR0, "dr0" },
8, // { X86_REG_DR1, "dr1" },
8, // { X86_REG_DR2, "dr2" },
8, // { X86_REG_DR3, "dr3" },
8, // { X86_REG_DR4, "dr4" },
8, // { X86_REG_DR5, "dr5" },
8, // { X86_REG_DR6, "dr6" },
8, // { X86_REG_DR7, "dr7" },
8, // { X86_REG_DR8, "dr8" },
8, // { X86_REG_DR9, "dr9" },
8, // { X86_REG_DR10, "dr10" },
8, // { X86_REG_DR11, "dr11" },
8, // { X86_REG_DR12, "dr12" },
8, // { X86_REG_DR13, "dr13" },
8, // { X86_REG_DR14, "dr14" },
8, // { X86_REG_DR15, "dr15" },
10, // { X86_REG_FP0, "fp0" },
10, // { X86_REG_FP1, "fp1" },
10, // { X86_REG_FP2, "fp2" },
10, // { X86_REG_FP3, "fp3" },
10, // { X86_REG_FP4, "fp4" },
10, // { X86_REG_FP5, "fp5" },
10, // { X86_REG_FP6, "fp6" },
10, // { X86_REG_FP7, "fp7" },
2, // { X86_REG_K0, "k0" },
2, // { X86_REG_K1, "k1" },
2, // { X86_REG_K2, "k2" },
2, // { X86_REG_K3, "k3" },
2, // { X86_REG_K4, "k4" },
2, // { X86_REG_K5, "k5" },
2, // { X86_REG_K6, "k6" },
2, // { X86_REG_K7, "k7" },
8, // { X86_REG_MM0, "mm0" },
8, // { X86_REG_MM1, "mm1" },
8, // { X86_REG_MM2, "mm2" },
8, // { X86_REG_MM3, "mm3" },
8, // { X86_REG_MM4, "mm4" },
8, // { X86_REG_MM5, "mm5" },
8, // { X86_REG_MM6, "mm6" },
8, // { X86_REG_MM7, "mm7" },
8, // { X86_REG_R8, "r8" },
8, // { X86_REG_R9, "r9" },
8, // { X86_REG_R10, "r10" },
8, // { X86_REG_R11, "r11" },
8, // { X86_REG_R12, "r12" },
8, // { X86_REG_R13, "r13" },
8, // { X86_REG_R14, "r14" },
8, // { X86_REG_R15, "r15" },
10, // { X86_REG_ST0, "st0" },
10, // { X86_REG_ST1, "st1" },
10, // { X86_REG_ST2, "st2" },
10, // { X86_REG_ST3, "st3" },
10, // { X86_REG_ST4, "st4" },
10, // { X86_REG_ST5, "st5" },
10, // { X86_REG_ST6, "st6" },
10, // { X86_REG_ST7, "st7" },
16, // { X86_REG_XMM0, "xmm0" },
16, // { X86_REG_XMM1, "xmm1" },
16, // { X86_REG_XMM2, "xmm2" },
16, // { X86_REG_XMM3, "xmm3" },
16, // { X86_REG_XMM4, "xmm4" },
16, // { X86_REG_XMM5, "xmm5" },
16, // { X86_REG_XMM6, "xmm6" },
16, // { X86_REG_XMM7, "xmm7" },
16, // { X86_REG_XMM8, "xmm8" },
16, // { X86_REG_XMM9, "xmm9" },
16, // { X86_REG_XMM10, "xmm10" },
16, // { X86_REG_XMM11, "xmm11" },
16, // { X86_REG_XMM12, "xmm12" },
16, // { X86_REG_XMM13, "xmm13" },
16, // { X86_REG_XMM14, "xmm14" },
16, // { X86_REG_XMM15, "xmm15" },
16, // { X86_REG_XMM16, "xmm16" },
16, // { X86_REG_XMM17, "xmm17" },
16, // { X86_REG_XMM18, "xmm18" },
16, // { X86_REG_XMM19, "xmm19" },
16, // { X86_REG_XMM20, "xmm20" },
16, // { X86_REG_XMM21, "xmm21" },
16, // { X86_REG_XMM22, "xmm22" },
16, // { X86_REG_XMM23, "xmm23" },
16, // { X86_REG_XMM24, "xmm24" },
16, // { X86_REG_XMM25, "xmm25" },
16, // { X86_REG_XMM26, "xmm26" },
16, // { X86_REG_XMM27, "xmm27" },
16, // { X86_REG_XMM28, "xmm28" },
16, // { X86_REG_XMM29, "xmm29" },
16, // { X86_REG_XMM30, "xmm30" },
16, // { X86_REG_XMM31, "xmm31" },
32, // { X86_REG_YMM0, "ymm0" },
32, // { X86_REG_YMM1, "ymm1" },
32, // { X86_REG_YMM2, "ymm2" },
32, // { X86_REG_YMM3, "ymm3" },
32, // { X86_REG_YMM4, "ymm4" },
32, // { X86_REG_YMM5, "ymm5" },
32, // { X86_REG_YMM6, "ymm6" },
32, // { X86_REG_YMM7, "ymm7" },
32, // { X86_REG_YMM8, "ymm8" },
32, // { X86_REG_YMM9, "ymm9" },
32, // { X86_REG_YMM10, "ymm10" },
32, // { X86_REG_YMM11, "ymm11" },
32, // { X86_REG_YMM12, "ymm12" },
32, // { X86_REG_YMM13, "ymm13" },
32, // { X86_REG_YMM14, "ymm14" },
32, // { X86_REG_YMM15, "ymm15" },
32, // { X86_REG_YMM16, "ymm16" },
32, // { X86_REG_YMM17, "ymm17" },
32, // { X86_REG_YMM18, "ymm18" },
32, // { X86_REG_YMM19, "ymm19" },
32, // { X86_REG_YMM20, "ymm20" },
32, // { X86_REG_YMM21, "ymm21" },
32, // { X86_REG_YMM22, "ymm22" },
32, // { X86_REG_YMM23, "ymm23" },
32, // { X86_REG_YMM24, "ymm24" },
32, // { X86_REG_YMM25, "ymm25" },
32, // { X86_REG_YMM26, "ymm26" },
32, // { X86_REG_YMM27, "ymm27" },
32, // { X86_REG_YMM28, "ymm28" },
32, // { X86_REG_YMM29, "ymm29" },
32, // { X86_REG_YMM30, "ymm30" },
32, // { X86_REG_YMM31, "ymm31" },
64, // { X86_REG_ZMM0, "zmm0" },
64, // { X86_REG_ZMM1, "zmm1" },
64, // { X86_REG_ZMM2, "zmm2" },
64, // { X86_REG_ZMM3, "zmm3" },
64, // { X86_REG_ZMM4, "zmm4" },
64, // { X86_REG_ZMM5, "zmm5" },
64, // { X86_REG_ZMM6, "zmm6" },
64, // { X86_REG_ZMM7, "zmm7" },
64, // { X86_REG_ZMM8, "zmm8" },
64, // { X86_REG_ZMM9, "zmm9" },
64, // { X86_REG_ZMM10, "zmm10" },
64, // { X86_REG_ZMM11, "zmm11" },
64, // { X86_REG_ZMM12, "zmm12" },
64, // { X86_REG_ZMM13, "zmm13" },
64, // { X86_REG_ZMM14, "zmm14" },
64, // { X86_REG_ZMM15, "zmm15" },
64, // { X86_REG_ZMM16, "zmm16" },
64, // { X86_REG_ZMM17, "zmm17" },
64, // { X86_REG_ZMM18, "zmm18" },
64, // { X86_REG_ZMM19, "zmm19" },
64, // { X86_REG_ZMM20, "zmm20" },
64, // { X86_REG_ZMM21, "zmm21" },
64, // { X86_REG_ZMM22, "zmm22" },
64, // { X86_REG_ZMM23, "zmm23" },
64, // { X86_REG_ZMM24, "zmm24" },
64, // { X86_REG_ZMM25, "zmm25" },
64, // { X86_REG_ZMM26, "zmm26" },
64, // { X86_REG_ZMM27, "zmm27" },
64, // { X86_REG_ZMM28, "zmm28" },
64, // { X86_REG_ZMM29, "zmm29" },
64, // { X86_REG_ZMM30, "zmm30" },
64, // { X86_REG_ZMM31, "zmm31" },
1, // { X86_REG_R8B, "r8b" },
1, // { X86_REG_R9B, "r9b" },
1, // { X86_REG_R10B, "r10b" },
1, // { X86_REG_R11B, "r11b" },
1, // { X86_REG_R12B, "r12b" },
1, // { X86_REG_R13B, "r13b" },
1, // { X86_REG_R14B, "r14b" },
1, // { X86_REG_R15B, "r15b" },
4, // { X86_REG_R8D, "r8d" },
4, // { X86_REG_R9D, "r9d" },
4, // { X86_REG_R10D, "r10d" },
4, // { X86_REG_R11D, "r11d" },
4, // { X86_REG_R12D, "r12d" },
4, // { X86_REG_R13D, "r13d" },
4, // { X86_REG_R14D, "r14d" },
4, // { X86_REG_R15D, "r15d" },
2, // { X86_REG_R8W, "r8w" },
2, // { X86_REG_R9W, "r9w" },
2, // { X86_REG_R10W, "r10w" },
2, // { X86_REG_R11W, "r11w" },
2, // { X86_REG_R12W, "r12w" },
2, // { X86_REG_R13W, "r13w" },
2, // { X86_REG_R14W, "r14w" },
2, // { X86_REG_R15W, "r15w" },
16, // { X86_REG_BND0, "bnd0" },
16, // { X86_REG_BND1, "bnd0" },
16, // { X86_REG_BND2, "bnd0" },
16, // { X86_REG_BND3, "bnd0" },
};
// printf()
#include <stdio.h>
// malloc()
#include <stdlib.h>
//#include "MCRegisterInfo.h"
// actually, the DiffListIterator stuff is private, so include the source.
#include "MCRegisterInfo.c"
// Needed for x86_init
#define GET_REGINFO_ENUM
#include "arch/X86/X86GenRegisterInfo.inc"
#define GET_REGINFO_MC_DESC
#include "arch/X86/X86GenRegisterInfo.inc"
// define intel variation getRegisterName()
#include "arch/X86/X86GenRegisterName1.inc"
// define regsize_map_64
//#include "arch/X86/X86Mapping.h"
/*
<arch/X86/X86Mapping.c awk '
BEGIN { in_struct=0; }
/^const uint8_t regsize_map_64 \[\] = {/ { in_struct=1; }
/^}/ && in_struct { print; in_struct=0; }
in_struct == 1 { print; }
' > ~/regsize.c
*/
#include "regsize.c"
// map internal register id to public register id
#include "include/capstone/x86.h"
static const struct register_map {
unsigned short id;
unsigned short pub_id;
} reg_map [] = {
// first dummy map
{ 0, 0 },
#include "arch/X86/X86MappingReg.inc"
};
// return 0 on invalid input, or public register ID otherwise
// NOTE: reg_map is sorted in order of internal register
#include "utils.h"
unsigned short X86_register_map(unsigned short id)
{
if (id < ARR_SIZE(reg_map))
return reg_map[id].pub_id;
return 0;
}
// modified from arch/X86/X86Disassembler.c (to avoid linking unnecessary things)
// to use variable names from X86Mapping.c
void X86_init(MCRegisterInfo *MRI)
{
// InitMCRegisterInfo(), X86GenRegisterInfo.inc
// RI->InitMCRegisterInfo(X86RegDesc, 277,
// RA, PC,
// X86MCRegisterClasses, 86,
// X86RegUnitRoots, 162, X86RegDiffLists, X86LaneMaskLists, X86RegStrings,
// X86RegClassStrings,
// X86SubRegIdxLists, 9,
// X86SubRegIdxRanges, X86RegEncodingTable);
/*
InitMCRegisterInfo(X86RegDesc, 234,
RA, PC,
X86MCRegisterClasses, 79,
X86RegUnitRoots, 119, X86RegDiffLists, X86RegStrings,
X86SubRegIdxLists, 7,
X86SubRegIdxRanges, X86RegEncodingTable);
*/
MCRegisterInfo_InitMCRegisterInfo(MRI, X86RegDesc, 277,
0, 0,
X86MCRegisterClasses, 86,
0, 0, X86RegDiffLists, 0,
X86SubRegIdxLists, 9,
0);
}
int main(void) {
MCRegisterInfo *mri;
mri = malloc(sizeof(*mri));
X86_init(mri);
const MCRegisterClass *gr64_cls = MCRegisterInfo_getRegClass(mri, X86_GR64RegClassID);
printf("SUBREGS = {\n");
DiffListIterator iter;
const uint16_t *SRI;
for (int reg = 1; reg < mri->NumRegs; reg++) {
if (!MCRegisterClass_contains(gr64_cls, reg)) continue;
printf(" \"%s\": [\n", getRegisterName(reg));
SRI = mri->SubRegIndices + mri->Desc[reg].SubRegIndices;
DiffListIterator_init(&iter, (MCPhysReg) reg, mri->DiffLists + mri->Desc[reg].SubRegs);
DiffListIterator_next(&iter);
while(DiffListIterator_isValid(&iter)) {
unsigned subreg = DiffListIterator_getVal(&iter);
unsigned pub_id = X86_register_map(subreg);
printf(" Reg(%d, %d), # %s\n", pub_id, regsize_map_64[subreg] * 8, getRegisterName(subreg));
++SRI;
DiffListIterator_next(&iter);
}
printf(" ],\n");
}
printf("}\n");
return 0;
}
#-------------------------------------------------------------------------------
# elftools: elf/relocation.py
#
# ELF relocations
#
# Eli Bendersky ([email protected])
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple
from elftools.common.exceptions import ELFRelocationError
from elftools.common.utils import elf_assert, struct_parse
from elftools.elf.sections import Section
from elftools.elf.enums import (
ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS,
ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
ENUM_RELOC_TYPE_S390X, ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH,
ENUM_D_TAG)
from elftools.construct import Container
from elftools.elf.constants import SHN_INDICES
from elftools.elf.relocation import RelocationSection
class RelocationHandler(object):
""" Handles the logic of relocations in ELF files.
"""
def __init__(self, elffile):
self.elffile = elffile
def find_relocations_for_section(self, section):
""" Given a section, find the relocation section for it in the ELF
file. Return a RelocationSection object, or None if none was
found.
"""
reloc_section_names = (
'.rel' + section.name,
'.rela' + section.name)
# Find the relocation section aimed at this one. Currently assume
# that either .rel or .rela section exists for this section, but
# not both.
for relsection in self.elffile.iter_sections():
if ( isinstance(relsection, RelocationSection) and
relsection.name in reloc_section_names):
return relsection
return None
def apply_section_relocations(self, stream, addresses, section, reloc_section):
""" Apply all relocations in reloc_section (a RelocationSection object)
to the given stream, that contains the data of the section that is
being relocated. The stream is modified as a result.
"""
# The symbol table associated with this relocation section
symtab = self.elffile.get_section(reloc_section['sh_link'])
for reloc in reloc_section.iter_relocations():
self._do_apply_relocation(stream, addresses, section, reloc, symtab)
def _do_apply_relocation(self, stream, addresses, section, reloc, symtab):
# Preparations for performing the relocation: obtain the value of
# the symbol mentioned in the relocation, as well as the relocation
# recipe which tells us how to actually perform it.
# All peppered with some sanity checking.
if reloc['r_info_sym'] >= symtab.num_symbols():
raise ELFRelocationError(
'Invalid symbol reference in relocation: index %s' % (
reloc['r_info_sym']))
sym = symtab.get_symbol(reloc['r_info_sym'])
sym_value = sym['st_value']
sec = None
sec_loc = None
if sym['st_shndx'] != "SHN_UNDEF":
sec = self.elffile.get_section(sym['st_shndx'])
sec_loc = addresses.get(sec.name, sec['sh_offset'])
sym_value += sec_loc
reloc_type = reloc['r_info_type']
recipe = None
if self.elffile.get_machine_arch() == 'x86':
if reloc.is_RELA():
raise ELFRelocationError(
'Unexpected RELA relocation for x86: %s' % reloc)
recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'x64':
if not reloc.is_RELA():
raise ELFRelocationError(
'Unexpected REL relocation for x64: %s' % reloc)
recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'MIPS':
if reloc.is_RELA():
if reloc_type == ENUM_RELOC_TYPE_MIPS['R_MIPS_64']:
if reloc['r_type2'] != 0 or reloc['r_type3'] != 0 or reloc['r_ssym'] != 0:
raise ELFRelocationError(
'Multiple relocations in R_MIPS_64 are not implemented: %s' % reloc)
recipe = self._RELOCATION_RECIPES_MIPS_RELA.get(reloc_type, None)
else:
recipe = self._RELOCATION_RECIPES_MIPS_REL.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'ARM':
if reloc.is_RELA():
raise ELFRelocationError(
'Unexpected RELA relocation for ARM: %s' % reloc)
recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'AArch64':
recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None)
elif self.elffile.get_machine_arch() == '64-bit PowerPC':
recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'IBM S/390':
recipe = self._RELOCATION_RECIPES_S390X.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'Linux BPF - in-kernel virtual machine':
recipe = self._RELOCATION_RECIPES_EBPF.get(reloc_type, None)
elif self.elffile.get_machine_arch() == 'LoongArch':
if not reloc.is_RELA():
raise ELFRelocationError(
'Unexpected REL relocation for LoongArch: %s' % reloc)
recipe = self._RELOCATION_RECIPES_LOONGARCH.get(reloc_type, None)
if recipe is None:
raise ELFRelocationError(
'Unsupported relocation type: %s' % reloc_type)
# So now we have everything we need to actually perform the relocation.
# Let's get to it:
# 0. Find out which struct we're going to be using to read this value
# from the stream and write it back.
if recipe.bytesize == 4:
value_struct = self.elffile.structs.Elf_word('')
elif recipe.bytesize == 8:
value_struct = self.elffile.structs.Elf_word64('')
elif recipe.bytesize == 1:
value_struct = self.elffile.structs.Elf_byte('')
elif recipe.bytesize == 2:
value_struct = self.elffile.structs.Elf_half('')
else:
raise ELFRelocationError('Invalid bytesize %s for relocation' %
recipe.bytesize)
# 1. Read the value from the stream (with correct size and endianness)
original_value = struct_parse(
value_struct,
stream,
stream_pos=section["sh_offset"] + reloc['r_offset']
)
# "offset", as pyelftools calls it, is the "P" or "place" variable in the
# relocation calculation. It really represents the address of the relocation
# location, not the offset. So we calculate the address.
addr = addresses.get(section.name, section['sh_offset']) + reloc["r_offset"]
# 2. Apply the relocation to the value, acting according to the recipe
relocated_value = recipe.calc_func(
value=original_value,
sym_value=sym_value,
offset=addr,
addend=reloc['r_addend'] if recipe.has_addend else 0
)
# for debugging, dumps useful information about the calculations
# adjust condition to match the relocation you want to debug
if reloc["r_offset"] == 1750:
print(
reloc,
"file_offset=" + hex(section["sh_offset"] + reloc["r_offset"]),
"orig_val=" + hex(original_value),
"raw_sym_value=" + hex(sym["st_value"]),
"sec=" + f"{sec.name}@{hex(sec_loc)}" if sec else "null",
"sym_value=" + hex(sym_value),
"addend=" + hex(reloc["r_addend"]),
"addr=" + hex(addr),
"relocated_value=" + hex(relocated_value)
)
# 3. Write the relocated value back into the stream
stream.seek(section["sh_offset"] + reloc['r_offset'])
# Make sure the relocated value fits back by wrapping it around. This
# looks like a problem, but it seems to be the way this is done in
# binutils too.
relocated_value = relocated_value % (2 ** (recipe.bytesize * 8))
value_struct.build_stream(relocated_value, stream)
# Relocations are represented by "recipes". Each recipe specifies:
# bytesize: The number of bytes to read (and write back) to the section.
# This is the unit of data on which relocation is performed.
# has_addend: Does this relocation have an extra addend?
# calc_func: A function that performs the relocation on an extracted
# value, and returns the updated value.
#
_RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE',
'bytesize has_addend calc_func')
def _reloc_calc_identity(value, sym_value, offset, addend=0):
return value
def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0):
return sym_value + value + addend
def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
return sym_value + value - offset
def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
return sym_value + addend
def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
return sym_value + addend - offset
def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0):
return value - sym_value - addend
def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
return sym_value // 4 + value - offset // 4
def _bpf_64_32_reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
return (sym_value + addend) // 8 - 1
_RELOCATION_RECIPES_ARM = {
ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_arm_reloc_calc_sym_plus_value_pcrel),
}
_RELOCATION_RECIPES_AARCH64 = {
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend_pcrel),
}
# https://dmz-portal.mips.com/wiki/MIPS_relocation_types
_RELOCATION_RECIPES_MIPS_REL = {
ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value),
}
_RELOCATION_RECIPES_MIPS_RELA = {
ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_MIPS['R_MIPS_64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True,
calc_func=_reloc_calc_sym_plus_value),
}
_RELOCATION_RECIPES_PPC64 = {
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel),
ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
}
_RELOCATION_RECIPES_X86 = {
ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False,
calc_func=_reloc_calc_sym_plus_value_pcrel),
}
_RELOCATION_RECIPES_X64 = {
ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend_pcrel),
ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
}
# https://www.kernel.org/doc/html/latest/bpf/llvm_reloc.html#different-relocation-types
_RELOCATION_RECIPES_EBPF = {
ENUM_RELOC_TYPE_BPF['R_BPF_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_BPF['R_BPF_64_64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_BPF['R_BPF_64_32']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=False, calc_func=_bpf_64_32_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_BPF['R_BPF_64_NODYLD32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
}
# https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
_RELOCATION_RECIPES_LOONGARCH = {
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD8']: _RELOCATION_RECIPE_TYPE(
bytesize=1, has_addend=True,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB8']: _RELOCATION_RECIPE_TYPE(
bytesize=1, has_addend=True,
calc_func=_reloc_calc_value_minus_sym_addend),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD16']: _RELOCATION_RECIPE_TYPE(
bytesize=2, has_addend=True,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB16']: _RELOCATION_RECIPE_TYPE(
bytesize=2, has_addend=True,
calc_func=_reloc_calc_value_minus_sym_addend),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_value_minus_sym_addend),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True,
calc_func=_reloc_calc_sym_plus_value),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True,
calc_func=_reloc_calc_value_minus_sym_addend),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32_PCREL']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend_pcrel),
ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64_PCREL']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True,
calc_func=_reloc_calc_sym_plus_addend_pcrel),
}
_RELOCATION_RECIPES_S390X = {
ENUM_RELOC_TYPE_S390X['R_390_32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
ENUM_RELOC_TYPE_S390X['R_390_PC32']: _RELOCATION_RECIPE_TYPE(
bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel),
ENUM_RELOC_TYPE_S390X['R_390_64']: _RELOCATION_RECIPE_TYPE(
bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
}
from glob import glob
from pwn import *
from elftools.elf.elffile import ELFFile
context(arch="amd64")
chall, = glob("/challenge/*.ko")
f = open(chall, "rb")
e = ELFFile(f)
get_sec = lambda name: e.get_section_by_name(name)
symtab = get_sec(".symtab")
get_sym = lambda name: symtab.get_symbol_by_name(name)
sym_i = lambda i: symtab.get_symbol(i)
text = get_sec(".text")
trela = get_sec(".rela.text")
data = get_sec(".data")
drela = get_sec(".rela.data")
mod = get_sec(".gnu.linkonce.this_module")
mrela = get_sec(".rela.gnu.linkonce.this_module")
base = 0x00100000
start_offset = next(
filter(lambda sec: sec["sh_type"] != "SHT_NULL", e.iter_sections())
)["sh_offset"]
offset_to_addr = lambda offset: offset + base - start_offset
addr_to_offset = lambda addr: addr - base + start_offset
addresses = {
sec.name: offset_to_addr(sec["sh_offset"]) for sec in e.iter_sections()
}
if len(addresses) != e.num_sections():
raise AssertionError("duplicate section names")
h = hex
from capstone import *
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment