aengelke · November 9, 2025 18:48
diff --git a/cud-analyze.py b/cud-analyze.py
 #!/usr/bin/python3
 # SPDX-FileCopyrightText: 2025 Alexis Engelke
 # SPDX-License-Identifier: Apache-2.0
 from bisect import bisect_right
 from dataclasses import dataclass, replace, field
 from typing import NamedTuple
 import sys

 from elftools.elf.elffile import ELFFile
 from elftools.dwarf.callframe import FDE
 from elftools.dwarf.constants import *

 class InvalidState(Exception):
    pass

 class State(NamedTuple):
    off: int = 0
    cfa: int = -1 # reg
    cfa_off: int = 0
    csrs: dict[int, int] = {}

 @dataclass
 class CUD:
    start: int
    size: int = 0 # analysis only
    prologue_start: int = 0 # offset into descriptor where the prologue instrs begin, -1 for no prologue
    pad_after_epilogue: int = 0 # -1 for no epilogue
    mode: str|None = None

 def emulate_cuds(cuds: list[CUD]):
    states = []
    for cud in cuds:
        if cud.mode is None:
            states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1}))
        elif cud.mode[0] == 2: # RSP
            csrs = {16:1}
            off = cud.start
            frame_sub = cud.mode[1] - (8+8*len(cud.mode[2]))
            if cud.prologue_start != -1:
                states.append(State(off=off, cfa=7, cfa_off=8, csrs=csrs))
                off += cud.prologue_start
                for i, reg in enumerate(cud.mode[2]):
                    csrs = csrs | {reg:i+2}
                    off += 1 if reg < 8 else 2
                    states.append(State(off=off, cfa=7, cfa_off=16+8*i, csrs=csrs))
                off += 1 if frame_sub == 8 else 4 if frame_sub < 0x80 else 7
            csrs = {16:1}|{reg:i+2 for i, reg in enumerate(cud.mode[2])}
            states.append(State(off=off, cfa=7, cfa_off=cud.mode[1], csrs=csrs))
            if cud.pad_after_epilogue != -1:
                pop_size = sum(1 if reg < 8 else 2 for reg in cud.mode[2])
                off = cud.start + cud.size - cud.pad_after_epilogue - pop_size
                if frame_sub:
                    states.append(State(off=off, cfa=7, cfa_off=8+8*len(cud.mode[2]), csrs=csrs))
                for i, reg in enumerate(cud.mode[2][::-1]):
                    off += 1 if reg < 8 else 2
                    states.append(State(off=off, cfa=7, cfa_off=8*(len(cud.mode[2])-i), csrs=csrs))
        elif cud.mode[0] == 1:
            csrs = {16:1}|{reg:i+2 for i, reg in enumerate(cud.mode[2])}
            off = cud.start + cud.prologue_start
            if cud.mode[1] + cud.prologue_start > 0:
                states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1}))
            if cud.mode[1] > 0:
                states.append(State(off=off+1, cfa=7, cfa_off=16, csrs={16:1,6:2}))
                if cud.mode[1] > 4:
                    states.append(State(off=off+4, cfa=6, cfa_off=16, csrs={16:1,6:2}))
            off += cud.mode[1]
            states.append(State(off=off, cfa=6, cfa_off=16, csrs=csrs))
            if cud.pad_after_epilogue != -1:
                states.append(State(off=cud.start+cud.size-cud.pad_after_epilogue, cfa=7, cfa_off=8, csrs={16:1}))
        else:
            assert False
    return states

 @dataclass
 class CUDModeNULL:
    start: int
    def finalize(self):
        return CUD(start=self.start)

 @dataclass
 class CUDModeRBP:
    start: int
    csrs: dict[int, int] = field(default_factory=dict)
    prologue_size: int = 0
    size: int = 0

    def finalize(self):
        push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r])
        if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6 or push_seq[0] != 6:
            raise InvalidState(f"invalid push order: {push_seq} {self.csrs}")

        return CUD(start=self.start, mode=[1, self.prologue_size, push_seq], size=self.size, pad_after_epilogue=(-1 if self.size == 0 else 0))

 @dataclass
 class CUDModeRSP:
    start: int
    csrs: dict[int, int] = field(default_factory=dict)
    prologue_advances: list[tuple[int, int]] = field(default_factory=list)
    epilogue_advances: list[tuple[int, int]] = field(default_factory=list)
    body_size: int = 0

    def finalize(self):
        cud = CUD(self.start)

        push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r])
        if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6:
            raise InvalidState(f"invalid rsp push order: {push_seq} {self.csrs}")
        frame_size = self.prologue_advances[-1][1]
        cud.mode = [2, frame_size, push_seq]

        prologue_size = 0
        if len(self.prologue_advances) == 1 and self.prologue_advances[0][0] == 0:
            cud.prologue_start = -1
        else:
            assert all((adv >= 0 for adv, _ in self.prologue_advances))
            assert all((adv >= 0 for adv, _ in self.epilogue_advances))
            if len(self.prologue_advances) < len(push_seq) or len(self.prologue_advances) > len(push_seq) + 1:
                raise InvalidState(f"invalid rsp prologue seq: {push_seq} {self.prologue_advances}")
            for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq, self.prologue_advances)):
                push_size = (1 if reg < 8 else 2)
                if i == 0 and adv > push_size:
                    cud.prologue_start += adv - push_size
                    adv = push_size
                prologue_size += push_size
                if adv != push_size or cfa_off != 16 + 8 * i:
                    raise InvalidState(f"invalid rsp prologue seq at {i}: {push_seq} {self.prologue_advances}")
            if len(self.prologue_advances) > len(push_seq):
                adv = self.prologue_advances[-1][0]
                if len(self.prologue_advances) > 1:
                    growth = frame_size - self.prologue_advances[-2][1]
                    if not ((growth == 8 and adv == 1) or (growth < 0x80) and (adv == 4) or (growth >= 0x80 and adv == 7)):
                        raise InvalidState(f"invalid rsp prologue rsp adjustment: {self.prologue_advances}")
                    prologue_size += adv
                else:
                    growth = frame_size - 8
                    if growth == 8:
                        if adv < 1:
                            raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
                        cud.prologue_start += adv - 1
                        prologue_size += 1
                    elif growth <= 0x80:
                        if adv < 4:
                            raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
                        cud.prologue_start += adv - 4
                        prologue_size += 4
                    elif growth > 0x80:
                        if adv < 7:
                            raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
                        cud.prologue_start += adv - 7
                        prologue_size += 7
            else:
                assert frame_size == len(push_seq) * 8 + 8

        prologue_end = 0 if cud.prologue_start == -1 else cud.prologue_start + prologue_size
        if not self.epilogue_advances:
            cud.pad_after_epilogue = -1
            cud.size = prologue_end + self.body_size
        else:
            if len(self.epilogue_advances) < len(push_seq) or len(self.epilogue_advances) > len(push_seq) + 1:
                raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}")
            epilogue_start = prologue_end + self.body_size
            if len(self.epilogue_advances) == len(push_seq):
                pop_size = (1 if push_seq[-1] < 8 else 2)
                epilogue_start -= pop_size
                _, cfa = self.epilogue_advances[0]
                self.epilogue_advances.insert(0, (pop_size, cfa + 8))
            epilogue_size = 0
            for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq[::-1], self.epilogue_advances)):
                pop_size = (1 if reg < 8 else 2)
                epilogue_size += pop_size
                if adv != pop_size or cfa_off != 16 + 8 * (len(push_seq) - i - 1):
                    raise InvalidState(f"invalid rsp epilogue seq at {i}: {push_seq} {self.epilogue_advances}")
            if self.epilogue_advances[-1][1] != 8:
                raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}")
            cud.pad_after_epilogue = self.epilogue_advances[-1][0]
            cud.size = cud.pad_after_epilogue + epilogue_start + epilogue_size
        return cud

 def analyze_fde(fde):
    cuds = []
    stack = []
    state = State()
    state_history = []

    cuds.append(CUDModeNULL(0))

    CS_MODE_NULL = 2 # CFA=RSP+8 RIP=CFA-8
    CS_PUSH_RBP = 3 # CFA=RSP+16 RIP=CFA-8 RBP=CFA-16, at a small offset <128 bytes
    CS_MODE_RBP = 4 # This is a RBP-based stack frame with zero extra CSRs.
    CS_MODE_RSP_PROLOGUE = 6 # RSP-based, in prologue
    CS_MODE_RSP_PROLOGUE_DELAY = 7 # RSP-based, in prologue, but no CSRs noted yet
    CS_MODE_RSP_EPILOGUE = 8 # RSP-based, in epilogue
    cudstate = CS_MODE_NULL
    last_advance = 0
    last_state = State(state)
    for inst in fde.cie.instructions + fde.instructions + [None]:
        opcode = inst.opcode if inst else DW_CFA_advance_loc4
        args = inst.args if inst else [fde.header.address_range - state.off]
        # print(inst)
        if opcode == DW_CFA_nop:
            pass
        elif opcode == DW_CFA_undefined:
            raise InvalidState("DW_CFA_undefined")
        elif opcode == DW_CFA_GNU_args_size:
            raise InvalidState("DW_CFA_GNU_args_size")
        elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_offset:
            state = state._replace(csrs=state.csrs | {args[0]: args[1]})
        elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_restore:
            state = state._replace(csrs=dict(state.csrs))
            del state.csrs[args[0]]
        elif opcode == DW_CFA_def_cfa_register:
            state = state._replace(cfa=args[0])
        elif opcode == DW_CFA_def_cfa_offset:
            state = state._replace(cfa_off=args[0])
        elif opcode == DW_CFA_def_cfa:
            state = state._replace(cfa=args[0], cfa_off=args[1])
        elif opcode == DW_CFA_remember_state:
            stack.append(state)
        elif opcode == DW_CFA_restore_state:
            state = stack.pop()._replace(off=state.off)
        elif (opcode < 0xc0 and opcode & 0xc0 == DW_CFA_advance_loc) or opcode in (DW_CFA_advance_loc1, DW_CFA_advance_loc2, DW_CFA_advance_loc4):
            # print(last_advance, cudstate, cuds, state, inst, args)
            state_history.append(state)
            if cudstate == CS_MODE_NULL:
                assert(type(cuds[-1]) == CUDModeNULL)
                if state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}:
                    cudstate = CS_MODE_NULL
                elif state.cfa == 7 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}:
                    cudstate = CS_PUSH_RBP
                elif state.cfa == 7 and state.cfa_off > 8 and len(state.csrs) > 1:
                    cudstate = CS_MODE_RSP_PROLOGUE
                    advance = last_advance if cuds[-1].start < state.off else 0
                    if advance == 0 or (len(state.csrs) == 2 and state.cfa_off == 16) or (len(state.csrs) == 1):
                        cuds.append(CUDModeRSP(state.off - advance))
                        cuds[-1].csrs = state.csrs
                        cuds[-1].prologue_advances.append((advance, state.cfa_off))
                    else:
                        # Needs NULL descriptor in between
                        cuds.append(CUDModeRSP(state.off))
                        cuds[-1].csrs = state.csrs
                        cuds[-1].prologue_advances.append((0, state.cfa_off))
                elif state.cfa == 7 and state.cfa_off > last_state.cfa_off and len(state.csrs) == 1:
                    cudstate = CS_MODE_RSP_PROLOGUE_DELAY
                    cuds.append(CUDModeRSP(state.off - last_advance)) # XXX: not accurate?
                    cuds[-1].csrs = state.csrs
                    cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
                elif state.cfa == 6 and state.cfa_off == 16:
                    cudstate = CS_MODE_RBP
                    cuds.append(CUDModeRBP(state.off))
                    cuds[-1].csrs = state.csrs
                else:
                    raise InvalidState(f"invalid state reached 2: {cudstate} {repr(state)}")
            elif cudstate == CS_MODE_RSP_PROLOGUE_DELAY:
                if state.cfa != 7:
                    raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
                elif last_state.csrs == state.csrs and last_state.cfa_off < state.cfa_off:
                    cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
                elif last_state.cfa_off < state.cfa_off:
                    assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs)
                    cuds[-1].csrs = state.csrs
                    cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
                    cudstate = CS_MODE_RSP_PROLOGUE
                elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
                    cudstate = CS_MODE_RSP_EPILOGUE
                    cuds[-1].body_size = last_advance
                    cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
                else:
                    raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
            elif cudstate == CS_MODE_RSP_PROLOGUE:
                if state.cfa != 7:
                    raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
                if last_state.cfa_off < state.cfa_off:
                    assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs)
                    cuds[-1].csrs = state.csrs
                    cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
                elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
                    cudstate = CS_MODE_RSP_EPILOGUE
                    cuds[-1].body_size = last_advance
                    cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
                elif state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}:
                    cudstate = CS_MODE_NULL
                    cuds[-1].body_size = last_advance
                    if len(last_state.csrs) == 2 and last_state.cfa_off == 16:
                        cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
                        # The current advance is folded into the epilogue
                        cuds.append(CUDModeNULL(state.off + args[0]))
                    else:
                        cuds.append(CUDModeNULL(state.off))
                else:
                    raise InvalidState(f"invalid state reached: {cudstate} {repr(state)} {repr(last_state)}")
            elif cudstate == CS_MODE_RSP_EPILOGUE:
                if state.cfa != 7:
                    raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
                if last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
                    cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
                elif state.cfa_off == 8 and state.csrs == {16: 1}:
                    if cuds[-1].epilogue_advances and cuds[-1].epilogue_advances[-1][1] == 8:
                        # E.g., pop, ret, something else without stack frame
                        cuds[-1].epilogue_advances[-1] = (cuds[-1].epilogue_advances[-1][0] + args[0], 8)
                    else:
                        cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
                    cudstate = CS_MODE_NULL
                    # The current advance is folded into the epilogue
                    cuds.append(CUDModeNULL(state.off + args[0]))
                else:
                    cudstate = CS_MODE_RSP_PROLOGUE
                    cuds.append(CUDModeRSP(state.off))
                    cuds[-1].csrs = state.csrs
                    cuds[-1].prologue_advances.append((0, state.cfa_off))
            elif cudstate == CS_PUSH_RBP:
                if state.cfa == 6 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}:
                    if last_advance != 3:
                        raise InvalidState(f"invalid state, instruction between push rbp/mov rbp,rsp? {repr(state)}")
                    cudstate = CS_MODE_RBP
                    cuds.append(CUDModeRBP(state.off - 4, state.csrs))
                    cuds[-1].prologue_size = 4
                elif state.cfa == 7 and state.cfa_off == 24 and len(state.csrs) == 3 and state.csrs.get(16) == 1 and state.csrs.get(6) == 2:
                    # Second push, must be rsp
                    cudstate = CS_MODE_RSP_PROLOGUE
                    pushed_reg, _ = next(iter(x for x in state.csrs.items() if x[0] not in (16, 6)))
                    if last_advance != (1 if pushed_reg < 8 else 2):
                        raise InvalidState(f"invalid state, instruction between pushes? {repr(state)}")
                    start = state.off - (1 if pushed_reg < 8 else 2) - 1
                    cuds.append(CUDModeRSP(start))
                    cuds[-1].csrs = state.csrs
                    cuds[-1].prologue_advances.append((1, last_state.cfa_off))
                    cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
                else:
                    raise InvalidState(f"invalid state reached 3: {cudstate} {repr(state)}")
            elif cudstate == CS_MODE_RBP:
                if state.cfa == 6 and state.cfa_off == 16:
                    if len(state.csrs) > len(last_state.csrs):
                        if not all(last_state.csrs[r] == state.csrs.get(r) for r in last_state.csrs):
                            raise InvalidState(f"RBP CSR mismatch? {last_state} -> {state}")
                        cuds[-1].csrs = state.csrs
                        cuds[-1].prologue_size = state.off - cuds[-1].start
                elif state.cfa == 7 and state.cfa_off == 8:
                    # cuds[-1].epilogue = True
                    cuds[-1].size = state.off - cuds[-1].start
                    cudstate = CS_MODE_NULL
                    cuds.append(CUDModeNULL(state.off))
                else:
                    raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
            else:
                raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
            state = state._replace(off=state.off + args[0])
            last_advance = args[0]
            last_state = state
        else:
            raise InvalidState(f"unhandled opcode {inst}")

    assert state.off == fde.header.address_range
    cuds = [cud.finalize() for cud in cuds]
    # Extend last CUD to end of function
    if cuds[-1].start + cuds[-1].size != state.off:
        remaining = state.off - cuds[-1].start
        if not cuds[-1].mode or cuds[-1].pad_after_epilogue == -1:
            cuds[-1].size = remaining
        elif cuds[-1].pad_after_epilogue > 0 and cuds[-1].pad_after_epilogue + remaining < 128:
            cuds[-1].pad_after_epilogue += remaining - cuds[-1].size
            cuds[-1].size = remaining
        else:
            cuds.append(CUD(start=cuds[-1].start + cuds[-1].size, size=remaining))
    for i in range(len(cuds)):
        if cuds[i].mode:
            continue
        # Fix NULL CUD size.
        if i+1 < len(cuds):
            cuds[i].size = cuds[i+1].start - cuds[i].start
        # Fold NULL CUDs into prologue/epilogue of adjacent descriptors.
        if i+1 < len(cuds) and cuds[i+1].prologue_start >= 0 and cuds[i].size + cuds[i+1].prologue_start < 128:
            cuds[i+1].start -= cuds[i].size
            cuds[i+1].size += cuds[i].size
            cuds[i+1].prologue_start += cuds[i].size
            cuds[i].size = 0
        if i > 0 and cuds[i-1].pad_after_epilogue >= 0 and cuds[i].size + cuds[i-1].pad_after_epilogue < 128:
            cuds[i-1].size += cuds[i].size
            cuds[i-1].pad_after_epilogue += cuds[i].size
            cuds[i].size = 0
    # Drop zero-sized NULL CUDs.
    cuds = [cud for cud in cuds if cud.size or cud.mode]
    assert cuds[-1].start + cuds[-1].size == fde.header.address_range

    # Verify that cuds are accurate.
    cud_states = emulate_cuds(cuds)
    offs_to_validate = {s.off for s in state_history} | {s.off for s in cud_states}
    for off in sorted(offs_to_validate):
        dwarf_state = state_history[bisect_right(state_history, off, key=lambda s: s.off)-1]
        cud_state = cud_states[bisect_right(cud_states, off, key=lambda s: s.off)-1]
        # We can't really validate CSRs, but things generally look good.
        # There might be some minor bugs, but this code isn't intending to be
        # perfect.
        #or any(cud_state.csrs.get(r) != o for r, o in dwarf_state.csrs.items())
        if (dwarf_state.cfa != cud_state.cfa or dwarf_state.cfa_off != cud_state.cfa_off):
            print(cuds)
            for x in state_history:
                print(x)
            for x in cud_states:
                print(x)
            raise InvalidState("validation failed")

    return cuds

 if __name__ == "__main__":
    for arg in sys.argv[1:]:
        with open(arg, "rb") as f:
            elf = ELFFile(f)
            if not elf.has_dwarf_info():
                print(f'{arg} has no DWARF info')
                continue
            dwarfinfo = elf.get_dwarf_info()
            for entry in dwarfinfo.EH_CFI_entries():
                if type(entry) == FDE:
                    addr = entry.header["initial_location"]
                    print("ADDR", hex(addr))
                    try:
                        cuds = analyze_fde(entry)
                        print("LEN", len(cuds))
                        for cud in cuds:
                            print("CUD-ALL", cud.prologue_start, cud.pad_after_epilogue, cud.mode)
                            print("CUD-MODE", cud.mode)
                            if cud.mode:
                                print("CUD-CSR", cud.mode[2])
                        print("DETAIL", cuds)
                    except InvalidState as e:
                        print("ERROR", e)
                        pass
	#!/usr/bin/python3
	# SPDX-FileCopyrightText: 2025 Alexis Engelke
	# SPDX-License-Identifier: Apache-2.0
	from bisect import bisect_right
	from dataclasses import dataclass, replace, field
	from typing import NamedTuple
	import sys

	from elftools.elf.elffile import ELFFile
	from elftools.dwarf.callframe import FDE
	from elftools.dwarf.constants import *

	class InvalidState(Exception):
	pass

	class State(NamedTuple):
	off: int = 0
	cfa: int = -1 # reg
	cfa_off: int = 0
	csrs: dict[int, int] = {}

	@dataclass
	class CUD:
	start: int
	size: int = 0 # analysis only
	prologue_start: int = 0 # offset into descriptor where the prologue instrs begin, -1 for no prologue
	pad_after_epilogue: int = 0 # -1 for no epilogue
	mode: str\|None = None

	def emulate_cuds(cuds: list[CUD]):
	states = []
	for cud in cuds:
	if cud.mode is None:
	states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1}))
	elif cud.mode[0] == 2: # RSP
	csrs = {16:1}
	off = cud.start
	frame_sub = cud.mode[1] - (8+8*len(cud.mode[2]))
	if cud.prologue_start != -1:
	states.append(State(off=off, cfa=7, cfa_off=8, csrs=csrs))
	off += cud.prologue_start
	for i, reg in enumerate(cud.mode[2]):
	csrs = csrs \| {reg:i+2}
	off += 1 if reg < 8 else 2
	states.append(State(off=off, cfa=7, cfa_off=16+8*i, csrs=csrs))
	off += 1 if frame_sub == 8 else 4 if frame_sub < 0x80 else 7
	csrs = {16:1}\|{reg:i+2 for i, reg in enumerate(cud.mode[2])}
	states.append(State(off=off, cfa=7, cfa_off=cud.mode[1], csrs=csrs))
	if cud.pad_after_epilogue != -1:
	pop_size = sum(1 if reg < 8 else 2 for reg in cud.mode[2])
	off = cud.start + cud.size - cud.pad_after_epilogue - pop_size
	if frame_sub:
	states.append(State(off=off, cfa=7, cfa_off=8+8*len(cud.mode[2]), csrs=csrs))
	for i, reg in enumerate(cud.mode[2][::-1]):
	off += 1 if reg < 8 else 2
	states.append(State(off=off, cfa=7, cfa_off=8*(len(cud.mode[2])-i), csrs=csrs))
	elif cud.mode[0] == 1:
	csrs = {16:1}\|{reg:i+2 for i, reg in enumerate(cud.mode[2])}
	off = cud.start + cud.prologue_start
	if cud.mode[1] + cud.prologue_start > 0:
	states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1}))
	if cud.mode[1] > 0:
	states.append(State(off=off+1, cfa=7, cfa_off=16, csrs={16:1,6:2}))
	if cud.mode[1] > 4:
	states.append(State(off=off+4, cfa=6, cfa_off=16, csrs={16:1,6:2}))
	off += cud.mode[1]
	states.append(State(off=off, cfa=6, cfa_off=16, csrs=csrs))
	if cud.pad_after_epilogue != -1:
	states.append(State(off=cud.start+cud.size-cud.pad_after_epilogue, cfa=7, cfa_off=8, csrs={16:1}))
	else:
	assert False
	return states

	@dataclass
	class CUDModeNULL:
	start: int
	def finalize(self):
	return CUD(start=self.start)

	@dataclass
	class CUDModeRBP:
	start: int
	csrs: dict[int, int] = field(default_factory=dict)
	prologue_size: int = 0
	size: int = 0

	def finalize(self):
	push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r])
	if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6 or push_seq[0] != 6:
	raise InvalidState(f"invalid push order: {push_seq} {self.csrs}")

	return CUD(start=self.start, mode=[1, self.prologue_size, push_seq], size=self.size, pad_after_epilogue=(-1 if self.size == 0 else 0))

	@dataclass
	class CUDModeRSP:
	start: int
	csrs: dict[int, int] = field(default_factory=dict)
	prologue_advances: list[tuple[int, int]] = field(default_factory=list)
	epilogue_advances: list[tuple[int, int]] = field(default_factory=list)
	body_size: int = 0

	def finalize(self):
	cud = CUD(self.start)

	push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r])
	if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6:
	raise InvalidState(f"invalid rsp push order: {push_seq} {self.csrs}")
	frame_size = self.prologue_advances[-1][1]
	cud.mode = [2, frame_size, push_seq]

	prologue_size = 0
	if len(self.prologue_advances) == 1 and self.prologue_advances[0][0] == 0:
	cud.prologue_start = -1
	else:
	assert all((adv >= 0 for adv, _ in self.prologue_advances))
	assert all((adv >= 0 for adv, _ in self.epilogue_advances))
	if len(self.prologue_advances) < len(push_seq) or len(self.prologue_advances) > len(push_seq) + 1:
	raise InvalidState(f"invalid rsp prologue seq: {push_seq} {self.prologue_advances}")
	for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq, self.prologue_advances)):
	push_size = (1 if reg < 8 else 2)
	if i == 0 and adv > push_size:
	cud.prologue_start += adv - push_size
	adv = push_size
	prologue_size += push_size
	if adv != push_size or cfa_off != 16 + 8 * i:
	raise InvalidState(f"invalid rsp prologue seq at {i}: {push_seq} {self.prologue_advances}")
	if len(self.prologue_advances) > len(push_seq):
	adv = self.prologue_advances[-1][0]
	if len(self.prologue_advances) > 1:
	growth = frame_size - self.prologue_advances[-2][1]
	if not ((growth == 8 and adv == 1) or (growth < 0x80) and (adv == 4) or (growth >= 0x80 and adv == 7)):
	raise InvalidState(f"invalid rsp prologue rsp adjustment: {self.prologue_advances}")
	prologue_size += adv
	else:
	growth = frame_size - 8
	if growth == 8:
	if adv < 1:
	raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
	cud.prologue_start += adv - 1
	prologue_size += 1
	elif growth <= 0x80:
	if adv < 4:
	raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
	cud.prologue_start += adv - 4
	prologue_size += 4
	elif growth > 0x80:
	if adv < 7:
	raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}")
	cud.prologue_start += adv - 7
	prologue_size += 7
	else:
	assert frame_size == len(push_seq) * 8 + 8

	prologue_end = 0 if cud.prologue_start == -1 else cud.prologue_start + prologue_size
	if not self.epilogue_advances:
	cud.pad_after_epilogue = -1
	cud.size = prologue_end + self.body_size
	else:
	if len(self.epilogue_advances) < len(push_seq) or len(self.epilogue_advances) > len(push_seq) + 1:
	raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}")
	epilogue_start = prologue_end + self.body_size
	if len(self.epilogue_advances) == len(push_seq):
	pop_size = (1 if push_seq[-1] < 8 else 2)
	epilogue_start -= pop_size
	_, cfa = self.epilogue_advances[0]
	self.epilogue_advances.insert(0, (pop_size, cfa + 8))
	epilogue_size = 0
	for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq[::-1], self.epilogue_advances)):
	pop_size = (1 if reg < 8 else 2)
	epilogue_size += pop_size
	if adv != pop_size or cfa_off != 16 + 8 * (len(push_seq) - i - 1):
	raise InvalidState(f"invalid rsp epilogue seq at {i}: {push_seq} {self.epilogue_advances}")
	if self.epilogue_advances[-1][1] != 8:
	raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}")
	cud.pad_after_epilogue = self.epilogue_advances[-1][0]
	cud.size = cud.pad_after_epilogue + epilogue_start + epilogue_size
	return cud

	def analyze_fde(fde):
	cuds = []
	stack = []
	state = State()
	state_history = []

	cuds.append(CUDModeNULL(0))

	CS_MODE_NULL = 2 # CFA=RSP+8 RIP=CFA-8
	CS_PUSH_RBP = 3 # CFA=RSP+16 RIP=CFA-8 RBP=CFA-16, at a small offset <128 bytes
	CS_MODE_RBP = 4 # This is a RBP-based stack frame with zero extra CSRs.
	CS_MODE_RSP_PROLOGUE = 6 # RSP-based, in prologue
	CS_MODE_RSP_PROLOGUE_DELAY = 7 # RSP-based, in prologue, but no CSRs noted yet
	CS_MODE_RSP_EPILOGUE = 8 # RSP-based, in epilogue
	cudstate = CS_MODE_NULL
	last_advance = 0
	last_state = State(state)
	for inst in fde.cie.instructions + fde.instructions + [None]:
	opcode = inst.opcode if inst else DW_CFA_advance_loc4
	args = inst.args if inst else [fde.header.address_range - state.off]
	# print(inst)
	if opcode == DW_CFA_nop:
	pass
	elif opcode == DW_CFA_undefined:
	raise InvalidState("DW_CFA_undefined")
	elif opcode == DW_CFA_GNU_args_size:
	raise InvalidState("DW_CFA_GNU_args_size")
	elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_offset:
	state = state._replace(csrs=state.csrs \| {args[0]: args[1]})
	elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_restore:
	state = state._replace(csrs=dict(state.csrs))
	del state.csrs[args[0]]
	elif opcode == DW_CFA_def_cfa_register:
	state = state._replace(cfa=args[0])
	elif opcode == DW_CFA_def_cfa_offset:
	state = state._replace(cfa_off=args[0])
	elif opcode == DW_CFA_def_cfa:
	state = state._replace(cfa=args[0], cfa_off=args[1])
	elif opcode == DW_CFA_remember_state:
	stack.append(state)
	elif opcode == DW_CFA_restore_state:
	state = stack.pop()._replace(off=state.off)
	elif (opcode < 0xc0 and opcode & 0xc0 == DW_CFA_advance_loc) or opcode in (DW_CFA_advance_loc1, DW_CFA_advance_loc2, DW_CFA_advance_loc4):
	# print(last_advance, cudstate, cuds, state, inst, args)
	state_history.append(state)
	if cudstate == CS_MODE_NULL:
	assert(type(cuds[-1]) == CUDModeNULL)
	if state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}:
	cudstate = CS_MODE_NULL
	elif state.cfa == 7 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}:
	cudstate = CS_PUSH_RBP
	elif state.cfa == 7 and state.cfa_off > 8 and len(state.csrs) > 1:
	cudstate = CS_MODE_RSP_PROLOGUE
	advance = last_advance if cuds[-1].start < state.off else 0
	if advance == 0 or (len(state.csrs) == 2 and state.cfa_off == 16) or (len(state.csrs) == 1):
	cuds.append(CUDModeRSP(state.off - advance))
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((advance, state.cfa_off))
	else:
	# Needs NULL descriptor in between
	cuds.append(CUDModeRSP(state.off))
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((0, state.cfa_off))
	elif state.cfa == 7 and state.cfa_off > last_state.cfa_off and len(state.csrs) == 1:
	cudstate = CS_MODE_RSP_PROLOGUE_DELAY
	cuds.append(CUDModeRSP(state.off - last_advance)) # XXX: not accurate?
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
	elif state.cfa == 6 and state.cfa_off == 16:
	cudstate = CS_MODE_RBP
	cuds.append(CUDModeRBP(state.off))
	cuds[-1].csrs = state.csrs
	else:
	raise InvalidState(f"invalid state reached 2: {cudstate} {repr(state)}")
	elif cudstate == CS_MODE_RSP_PROLOGUE_DELAY:
	if state.cfa != 7:
	raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
	elif last_state.csrs == state.csrs and last_state.cfa_off < state.cfa_off:
	cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
	elif last_state.cfa_off < state.cfa_off:
	assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs)
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
	cudstate = CS_MODE_RSP_PROLOGUE
	elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
	cudstate = CS_MODE_RSP_EPILOGUE
	cuds[-1].body_size = last_advance
	cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
	else:
	raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
	elif cudstate == CS_MODE_RSP_PROLOGUE:
	if state.cfa != 7:
	raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
	if last_state.cfa_off < state.cfa_off:
	assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs)
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
	elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
	cudstate = CS_MODE_RSP_EPILOGUE
	cuds[-1].body_size = last_advance
	cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
	elif state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}:
	cudstate = CS_MODE_NULL
	cuds[-1].body_size = last_advance
	if len(last_state.csrs) == 2 and last_state.cfa_off == 16:
	cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
	# The current advance is folded into the epilogue
	cuds.append(CUDModeNULL(state.off + args[0]))
	else:
	cuds.append(CUDModeNULL(state.off))
	else:
	raise InvalidState(f"invalid state reached: {cudstate} {repr(state)} {repr(last_state)}")
	elif cudstate == CS_MODE_RSP_EPILOGUE:
	if state.cfa != 7:
	raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}")
	if last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off:
	cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
	elif state.cfa_off == 8 and state.csrs == {16: 1}:
	if cuds[-1].epilogue_advances and cuds[-1].epilogue_advances[-1][1] == 8:
	# E.g., pop, ret, something else without stack frame
	cuds[-1].epilogue_advances[-1] = (cuds[-1].epilogue_advances[-1][0] + args[0], 8)
	else:
	cuds[-1].epilogue_advances.append((args[0], state.cfa_off))
	cudstate = CS_MODE_NULL
	# The current advance is folded into the epilogue
	cuds.append(CUDModeNULL(state.off + args[0]))
	else:
	cudstate = CS_MODE_RSP_PROLOGUE
	cuds.append(CUDModeRSP(state.off))
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((0, state.cfa_off))
	elif cudstate == CS_PUSH_RBP:
	if state.cfa == 6 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}:
	if last_advance != 3:
	raise InvalidState(f"invalid state, instruction between push rbp/mov rbp,rsp? {repr(state)}")
	cudstate = CS_MODE_RBP
	cuds.append(CUDModeRBP(state.off - 4, state.csrs))
	cuds[-1].prologue_size = 4
	elif state.cfa == 7 and state.cfa_off == 24 and len(state.csrs) == 3 and state.csrs.get(16) == 1 and state.csrs.get(6) == 2:
	# Second push, must be rsp
	cudstate = CS_MODE_RSP_PROLOGUE
	pushed_reg, _ = next(iter(x for x in state.csrs.items() if x[0] not in (16, 6)))
	if last_advance != (1 if pushed_reg < 8 else 2):
	raise InvalidState(f"invalid state, instruction between pushes? {repr(state)}")
	start = state.off - (1 if pushed_reg < 8 else 2) - 1
	cuds.append(CUDModeRSP(start))
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_advances.append((1, last_state.cfa_off))
	cuds[-1].prologue_advances.append((last_advance, state.cfa_off))
	else:
	raise InvalidState(f"invalid state reached 3: {cudstate} {repr(state)}")
	elif cudstate == CS_MODE_RBP:
	if state.cfa == 6 and state.cfa_off == 16:
	if len(state.csrs) > len(last_state.csrs):
	if not all(last_state.csrs[r] == state.csrs.get(r) for r in last_state.csrs):
	raise InvalidState(f"RBP CSR mismatch? {last_state} -> {state}")
	cuds[-1].csrs = state.csrs
	cuds[-1].prologue_size = state.off - cuds[-1].start
	elif state.cfa == 7 and state.cfa_off == 8:
	# cuds[-1].epilogue = True
	cuds[-1].size = state.off - cuds[-1].start
	cudstate = CS_MODE_NULL
	cuds.append(CUDModeNULL(state.off))
	else:
	raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
	else:
	raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}")
	state = state._replace(off=state.off + args[0])
	last_advance = args[0]
	last_state = state
	else:
	raise InvalidState(f"unhandled opcode {inst}")

	assert state.off == fde.header.address_range
	cuds = [cud.finalize() for cud in cuds]
	# Extend last CUD to end of function
	if cuds[-1].start + cuds[-1].size != state.off:
	remaining = state.off - cuds[-1].start
	if not cuds[-1].mode or cuds[-1].pad_after_epilogue == -1:
	cuds[-1].size = remaining
	elif cuds[-1].pad_after_epilogue > 0 and cuds[-1].pad_after_epilogue + remaining < 128:
	cuds[-1].pad_after_epilogue += remaining - cuds[-1].size
	cuds[-1].size = remaining
	else:
	cuds.append(CUD(start=cuds[-1].start + cuds[-1].size, size=remaining))
	for i in range(len(cuds)):
	if cuds[i].mode:
	continue
	# Fix NULL CUD size.
	if i+1 < len(cuds):
	cuds[i].size = cuds[i+1].start - cuds[i].start
	# Fold NULL CUDs into prologue/epilogue of adjacent descriptors.
	if i+1 < len(cuds) and cuds[i+1].prologue_start >= 0 and cuds[i].size + cuds[i+1].prologue_start < 128:
	cuds[i+1].start -= cuds[i].size
	cuds[i+1].size += cuds[i].size
	cuds[i+1].prologue_start += cuds[i].size
	cuds[i].size = 0
	if i > 0 and cuds[i-1].pad_after_epilogue >= 0 and cuds[i].size + cuds[i-1].pad_after_epilogue < 128:
	cuds[i-1].size += cuds[i].size
	cuds[i-1].pad_after_epilogue += cuds[i].size
	cuds[i].size = 0
	# Drop zero-sized NULL CUDs.
	cuds = [cud for cud in cuds if cud.size or cud.mode]
	assert cuds[-1].start + cuds[-1].size == fde.header.address_range

	# Verify that cuds are accurate.
	cud_states = emulate_cuds(cuds)
	offs_to_validate = {s.off for s in state_history} \| {s.off for s in cud_states}
	for off in sorted(offs_to_validate):
	dwarf_state = state_history[bisect_right(state_history, off, key=lambda s: s.off)-1]
	cud_state = cud_states[bisect_right(cud_states, off, key=lambda s: s.off)-1]
	# We can't really validate CSRs, but things generally look good.
	# There might be some minor bugs, but this code isn't intending to be
	# perfect.
	#or any(cud_state.csrs.get(r) != o for r, o in dwarf_state.csrs.items())
	if (dwarf_state.cfa != cud_state.cfa or dwarf_state.cfa_off != cud_state.cfa_off):
	print(cuds)
	for x in state_history:
	print(x)
	for x in cud_states:
	print(x)
	raise InvalidState("validation failed")

	return cuds

	if __name__ == "__main__":
	for arg in sys.argv[1:]:
	with open(arg, "rb") as f:
	elf = ELFFile(f)
	if not elf.has_dwarf_info():
	print(f'{arg} has no DWARF info')
	continue
	dwarfinfo = elf.get_dwarf_info()
	for entry in dwarfinfo.EH_CFI_entries():
	if type(entry) == FDE:
	addr = entry.header["initial_location"]
	print("ADDR", hex(addr))
	try:
	cuds = analyze_fde(entry)
	print("LEN", len(cuds))
	for cud in cuds:
	print("CUD-ALL", cud.prologue_start, cud.pad_after_epilogue, cud.mode)
	print("CUD-MODE", cud.mode)
	if cud.mode:
	print("CUD-CSR", cud.mode[2])
	print("DETAIL", cuds)
	except InvalidState as e:
	print("ERROR", e)
	pass
No results found