Skip to content

Instantly share code, notes, and snippets.

@aengelke
Created November 9, 2025 18:48
Show Gist options
  • Select an option

  • Save aengelke/7d984567bd14a0016837391a527ef2c1 to your computer and use it in GitHub Desktop.

Select an option

Save aengelke/7d984567bd14a0016837391a527ef2c1 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# SPDX-FileCopyrightText: 2025 Alexis Engelke
# SPDX-License-Identifier: Apache-2.0
from bisect import bisect_right
from dataclasses import dataclass, replace, field
from typing import NamedTuple
import sys
from elftools.elf.elffile import ELFFile
from elftools.dwarf.callframe import FDE
from elftools.dwarf.constants import *
class InvalidState(Exception):
pass
class State(NamedTuple):
off: int = 0
cfa: int = -1 # reg
cfa_off: int = 0
csrs: dict[int, int] = {}
@dataclass
class CUD:
start: int
size: int = 0 # analysis only
prologue_start: int = 0 # offset into descriptor where the prologue instrs begin, -1 for no prologue
pad_after_epilogue: int = 0 # -1 for no epilogue
mode: str|None = None
def emulate_cuds(cuds: list[CUD]):
states = []
for cud in cuds:
if cud.mode is None:
states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1}))
elif cud.mode[0] == 2: # RSP
csrs = {16:1}
off = cud.start
frame_sub = cud.mode[1] - (8+8*len(cud.mode[2]))
if cud.prologue_start != -1:
states.append(State(off=off, cfa=7, cfa_off=8, csrs=csrs))
off += cud.prologue_start
for i, reg in enumerate(cud.mode[2]):
csrs = csrs | {reg:i+2}
off += 1 if reg < 8 else 2
states.append(State(off=off, cfa=7, cfa_off=16+8*i, csrs=csrs))
off += 1 if frame_sub == 8 else 4 if frame_sub < 0x80 else 7
csrs = {16:1}|{reg:i+2 for i, reg in enumerate(cud.mode[2])}
states.append(State(off=off, cfa=7, cfa_off=cud.mode[1], csrs=csrs))
if cud.pad_after_epilogue != -1:
pop_size = sum(1 if reg < 8 else 2 for reg in cud.mode[2])
off = cud.start + cud.size - cud.pad_after_epilogue - pop_size
if frame_sub:
states.append(State(off=off, cfa=7, cfa_off=8+8*len(cud.mode[2]), csrs=csrs))
for i, reg in enumerate(cud.mode[2][::-1]):
off += 1 if reg < 8 else 2
states.append(State(off=off, cfa=7, cfa_off=8*(len(cud.mode[2])-i), csrs=csrs))
elif cud.mode[0] == 1:
csrs = {16:1}|{reg:i+2 for i, reg in enumerate(cud.mode[2])}
off = cud.start + cud.prologue_start
if cud.mode[1] + cud.prologue_start > 0:
states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1}))
if cud.mode[1] > 0:
states.append(State(off=off+1, cfa=7, cfa_off=16, csrs={16:1,6:2}))
if cud.mode[1] > 4:
states.append(State(off=off+4, cfa=6, cfa_off=16, csrs={16:1,6:2}))
off += cud.mode[1]
states.append(State(off=off, cfa=6, cfa_off=16, csrs=csrs))
if cud.pad_after_epilogue != -1:
states.append(State(off=cud.start+cud.size-cud.pad_after_epilogue, cfa=7, cfa_off=8, csrs={16:1}))
else:
assert False
return states
@dataclass
class CUDModeNULL:
start: int
def finalize(self):
return CUD(start=self.start)
@dataclass
class CUDModeRBP:
start: int
csrs: dict[int, int] = field(default_factory=dict)
prologue_size: int = 0
size: int = 0
def finalize(self):
push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r])
if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6 or push_seq[0] != 6:
raise InvalidState(f"invalid push order: {push_seq} {self.csrs}")
return CUD(start=self.start, mode=[1, self.prologue_size, push_seq], size=self.size, pad_after_epilogue=(-1 if self.size == 0 else 0))
@dataclass
class CUDModeRSP:
start: int
csrs: dict[int, int] = field(default_factory=dict)
prologue_advances: list[tuple[int, int]] = field(default_factory=list)
epilogue_advances: list[tuple[int, int]] = field(default_factory=list)
body_size: int = 0
def finalize(self):
cud = CUD(self.start)
push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r])
if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6:
raise InvalidState(f"invalid rsp push order: {push_seq} {self.csrs}")
frame_size = self.prologue_advances[-1][1]
cud.mode = [2, frame_size, push_seq]
prologue_size = 0
if len(self.prologue_advances) == 1 and self.prologue_advances[0][0] == 0:
cud.prologue_start = -1
else:
assert all((adv >= 0 for adv, _ in self.prologue_advances))
assert all((adv >= 0 for adv, _ in self.epilogue_advances))
if len(self.prologue_advances) < len(push_seq) or len(self.prologue_advances) > len(push_seq) + 1:
raise InvalidState(f"invalid rsp prologue seq: {push_seq} {self.prologue_advances}")
for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq, self.prologue_advances)):
push_size = (1 if reg < 8 else 2)
if i == 0 and adv > push_size:
cud.prologue_start += adv - push_size
adv = push_size
prologue_size += push_size
if adv != push_size or cfa_off != 16 + 8 * i:
raise InvalidState(f"invalid rsp prologue seq at {i}: {push_seq} {self.prologue_advances}")
if len(self.prologue_advances) > len(push_seq):
adv = self.prologue_advances[-1][0]
if len(self.prologue_advances) > 1:
growth = frame_size - self.prologue_advances[-2][1]
if not ((growth == 8 and adv == 1) or (growth < 0x80) and (adv == 4) or (growth >= 0x80 and adv == 7)):
raise InvalidState(f"invalid rsp prologue rsp adjustment: {self.prologue_advances}")
prologue_size += adv
else:
growth = frame_size - 8
if growth == 8:
if adv < 1:
raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
cud.prologue_start += adv - 1
prologue_size += 1
elif growth <= 0x80:
if adv < 4:
raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
cud.prologue_start += adv - 4
prologue_size += 4
elif growth > 0x80:
if adv < 7:
raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
cud.prologue_start += adv - 7
prologue_size += 7
else:
assert frame_size == len(push_seq) * 8 + 8
prologue_end = 0 if cud.prologue_start == -1 else cud.prologue_start + prologue_size
if not self.epilogue_advances:
cud.pad_after_epilogue = -1
cud.size = prologue_end + self.body_size
else:
if len(self.epilogue_advances) < len(push_seq) or len(self.epilogue_advances) > len(push_seq) + 1:
raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}")
epilogue_start = prologue_end + self.body_size
if len(self.epilogue_advances) == len(push_seq):
pop_size = (1 if push_seq[-1] < 8 else 2)
epilogue_start -= pop_size
_, cfa = self.epilogue_advances[0]
self.epilogue_advances.insert(0, (pop_size, cfa + 8))
epilogue_size = 0
for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq[::-1], self.epilogue_advances)):
pop_size = (1 if reg < 8 else 2)
epilogue_size += pop_size
if adv != pop_size or cfa_off != 16 + 8 * (len(push_seq) - i - 1):
raise InvalidState(f"invalid rsp epilogue seq at {i}: {push_seq} {self.epilogue_advances}")
if self.epilogue_advances[-1][1] != 8:
raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}")
cud.pad_after_epilogue = self.epilogue_advances[-1][0]
cud.size = cud.pad_after_epilogue + epilogue_start + epilogue_size
return cud
def analyze_fde(fde):
cuds = []
stack = []
state = State()
state_history = []
cuds.append(CUDModeNULL(0))
CS_MODE_NULL = 2 # CFA=RSP+8 RIP=CFA-8
CS_PUSH_RBP = 3 # CFA=RSP+16 RIP=CFA-8 RBP=CFA-16, at a small offset <128 bytes
CS_MODE_RBP = 4 # This is a RBP-based stack frame with zero extra CSRs.
CS_MODE_RSP_PROLOGUE = 6 # RSP-based, in prologue
CS_MODE_RSP_PROLOGUE_DELAY = 7 # RSP-based, in prologue, but no CSRs noted yet
CS_MODE_RSP_EPILOGUE = 8 # RSP-based, in epilogue
cudstate = CS_MODE_NULL
last_advance = 0
last_state = State(state)
for inst in fde.cie.instructions + fde.instructions + [None]:
opcode = inst.opcode if inst else DW_CFA_advance_loc4
args = inst.args if inst else [fde.header.address_range - state.off]
# print(inst)
if opcode == DW_CFA_nop:
pass
elif opcode == DW_CFA_undefined:
raise InvalidState("DW_CFA_undefined")
elif opcode == DW_CFA_GNU_args_size:
raise InvalidState("DW_CFA_GNU_args_size")
elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_offset:
state = state._replace(csrs=state.csrs | {args[0]: args[1]})
elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_restore:
state = state._replace(csrs=dict(state.csrs))
del state.csrs[args[0]]
elif opcode == DW_CFA_def_cfa_register:
state = state._replace(cfa=args[0])
elif opcode == DW_CFA_def_cfa_offset:
state = state._replace(cfa_off=args[0])
elif opcode == DW_CFA_def_cfa:
state = state._replace(cfa=args[0], cfa_off=args[1])
elif opcode == DW_CFA_remember_state:
stack.append(state)
elif opcode == DW_CFA_restore_state:
state = stack.pop()._replace(off=state.off)
elif (opcode < 0xc0 and opcode & 0xc0 == DW_CFA_advance_loc) or opcode in (DW_CFA_advance_loc1, DW_CFA_advance_loc2, DW_CFA_advance_loc4):
# print(last_advance, cudstate, cuds, state, inst, args)
state_history.append(state)
if cudstate == CS_MODE_NULL:
assert(type(cuds[-1]) == CUDModeNULL)
if state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}:
cudstate = CS_MODE_NULL
elif state.cfa == 7 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}:
cudstate = CS_PUSH_RBP
elif state.cfa == 7 and state.cfa_off > 8 and len(state.csrs) > 1:
cudstate = CS_MODE_RSP_PROLOGUE
advance = last_advance if cuds[-1].start < state.off else 0
if advance == 0 or (len(state.csrs) == 2 and state.cfa_off == 16) or (len(state.csrs) == 1):
cuds.append(CUDModeRSP(state.off - advance))
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((advance, state.cfa_off))
else:
# Needs NULL descriptor in between
cuds.append(CUDModeRSP(state.off))
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((0, state.cfa_off))
elif state.cfa == 7 and state.cfa_off > last_state.cfa_off and len(state.csrs) == 1:
cudstate = CS_MODE_RSP_PROLOGUE_DELAY
cuds.append(CUDModeRSP(state.off - last_advance)) # XXX: not accurate?
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
elif state.cfa == 6 and state.cfa_off == 16:
cudstate = CS_MODE_RBP
cuds.append(CUDModeRBP(state.off))
cuds[-1].csrs = state.csrs
else:
raise InvalidState(f"invalid state reached 2: {cudstate} {repr(state)}")
elif cudstate == CS_MODE_RSP_PROLOGUE_DELAY:
if state.cfa != 7:
raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
elif last_state.csrs == state.csrs and last_state.cfa_off < state.cfa_off:
cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
elif last_state.cfa_off < state.cfa_off:
assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs)
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
cudstate = CS_MODE_RSP_PROLOGUE
elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
cudstate = CS_MODE_RSP_EPILOGUE
cuds[-1].body_size = last_advance
cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
else:
raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
elif cudstate == CS_MODE_RSP_PROLOGUE:
if state.cfa != 7:
raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
if last_state.cfa_off < state.cfa_off:
assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs)
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
cudstate = CS_MODE_RSP_EPILOGUE
cuds[-1].body_size = last_advance
cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
elif state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}:
cudstate = CS_MODE_NULL
cuds[-1].body_size = last_advance
if len(last_state.csrs) == 2 and last_state.cfa_off == 16:
cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
# The current advance is folded into the epilogue
cuds.append(CUDModeNULL(state.off + args[0]))
else:
cuds.append(CUDModeNULL(state.off))
else:
raise InvalidState(f"invalid state reached: {cudstate} {repr(state)} {repr(last_state)}")
elif cudstate == CS_MODE_RSP_EPILOGUE:
if state.cfa != 7:
raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
if last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
elif state.cfa_off == 8 and state.csrs == {16: 1}:
if cuds[-1].epilogue_advances and cuds[-1].epilogue_advances[-1][1] == 8:
# E.g., pop, ret, something else without stack frame
cuds[-1].epilogue_advances[-1] = (cuds[-1].epilogue_advances[-1][0] + args[0], 8)
else:
cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
cudstate = CS_MODE_NULL
# The current advance is folded into the epilogue
cuds.append(CUDModeNULL(state.off + args[0]))
else:
cudstate = CS_MODE_RSP_PROLOGUE
cuds.append(CUDModeRSP(state.off))
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((0, state.cfa_off))
elif cudstate == CS_PUSH_RBP:
if state.cfa == 6 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}:
if last_advance != 3:
raise InvalidState(f"invalid state, instruction between push rbp/mov rbp,rsp? {repr(state)}")
cudstate = CS_MODE_RBP
cuds.append(CUDModeRBP(state.off - 4, state.csrs))
cuds[-1].prologue_size = 4
elif state.cfa == 7 and state.cfa_off == 24 and len(state.csrs) == 3 and state.csrs.get(16) == 1 and state.csrs.get(6) == 2:
# Second push, must be rsp
cudstate = CS_MODE_RSP_PROLOGUE
pushed_reg, _ = next(iter(x for x in state.csrs.items() if x[0] not in (16, 6)))
if last_advance != (1 if pushed_reg < 8 else 2):
raise InvalidState(f"invalid state, instruction between pushes? {repr(state)}")
start = state.off - (1 if pushed_reg < 8 else 2) - 1
cuds.append(CUDModeRSP(start))
cuds[-1].csrs = state.csrs
cuds[-1].prologue_advances.append((1, last_state.cfa_off))
cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
else:
raise InvalidState(f"invalid state reached 3: {cudstate} {repr(state)}")
elif cudstate == CS_MODE_RBP:
if state.cfa == 6 and state.cfa_off == 16:
if len(state.csrs) > len(last_state.csrs):
if not all(last_state.csrs[r] == state.csrs.get(r) for r in last_state.csrs):
raise InvalidState(f"RBP CSR mismatch? {last_state} -> {state}")
cuds[-1].csrs = state.csrs
cuds[-1].prologue_size = state.off - cuds[-1].start
elif state.cfa == 7 and state.cfa_off == 8:
# cuds[-1].epilogue = True
cuds[-1].size = state.off - cuds[-1].start
cudstate = CS_MODE_NULL
cuds.append(CUDModeNULL(state.off))
else:
raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
else:
raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
state = state._replace(off=state.off + args[0])
last_advance = args[0]
last_state = state
else:
raise InvalidState(f"unhandled opcode {inst}")
assert state.off == fde.header.address_range
cuds = [cud.finalize() for cud in cuds]
# Extend last CUD to end of function
if cuds[-1].start + cuds[-1].size != state.off:
remaining = state.off - cuds[-1].start
if not cuds[-1].mode or cuds[-1].pad_after_epilogue == -1:
cuds[-1].size = remaining
elif cuds[-1].pad_after_epilogue > 0 and cuds[-1].pad_after_epilogue + remaining < 128:
cuds[-1].pad_after_epilogue += remaining - cuds[-1].size
cuds[-1].size = remaining
else:
cuds.append(CUD(start=cuds[-1].start + cuds[-1].size, size=remaining))
for i in range(len(cuds)):
if cuds[i].mode:
continue
# Fix NULL CUD size.
if i+1 < len(cuds):
cuds[i].size = cuds[i+1].start - cuds[i].start
# Fold NULL CUDs into prologue/epilogue of adjacent descriptors.
if i+1 < len(cuds) and cuds[i+1].prologue_start >= 0 and cuds[i].size + cuds[i+1].prologue_start < 128:
cuds[i+1].start -= cuds[i].size
cuds[i+1].size += cuds[i].size
cuds[i+1].prologue_start += cuds[i].size
cuds[i].size = 0
if i > 0 and cuds[i-1].pad_after_epilogue >= 0 and cuds[i].size + cuds[i-1].pad_after_epilogue < 128:
cuds[i-1].size += cuds[i].size
cuds[i-1].pad_after_epilogue += cuds[i].size
cuds[i].size = 0
# Drop zero-sized NULL CUDs.
cuds = [cud for cud in cuds if cud.size or cud.mode]
assert cuds[-1].start + cuds[-1].size == fde.header.address_range
# Verify that cuds are accurate.
cud_states = emulate_cuds(cuds)
offs_to_validate = {s.off for s in state_history} | {s.off for s in cud_states}
for off in sorted(offs_to_validate):
dwarf_state = state_history[bisect_right(state_history, off, key=lambda s: s.off)-1]
cud_state = cud_states[bisect_right(cud_states, off, key=lambda s: s.off)-1]
# We can't really validate CSRs, but things generally look good.
# There might be some minor bugs, but this code isn't intending to be
# perfect.
#or any(cud_state.csrs.get(r) != o for r, o in dwarf_state.csrs.items())
if (dwarf_state.cfa != cud_state.cfa or dwarf_state.cfa_off != cud_state.cfa_off):
print(cuds)
for x in state_history:
print(x)
for x in cud_states:
print(x)
raise InvalidState("validation failed")
return cuds
if __name__ == "__main__":
for arg in sys.argv[1:]:
with open(arg, "rb") as f:
elf = ELFFile(f)
if not elf.has_dwarf_info():
print(f'{arg} has no DWARF info')
continue
dwarfinfo = elf.get_dwarf_info()
for entry in dwarfinfo.EH_CFI_entries():
if type(entry) == FDE:
addr = entry.header["initial_location"]
print("ADDR", hex(addr))
try:
cuds = analyze_fde(entry)
print("LEN", len(cuds))
for cud in cuds:
print("CUD-ALL", cud.prologue_start, cud.pad_after_epilogue, cud.mode)
print("CUD-MODE", cud.mode)
if cud.mode:
print("CUD-CSR", cud.mode[2])
print("DETAIL", cuds)
except InvalidState as e:
print("ERROR", e)
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment