Created
November 9, 2025 18:48
-
-
Save aengelke/7d984567bd14a0016837391a527ef2c1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| # SPDX-FileCopyrightText: 2025 Alexis Engelke | |
| # SPDX-License-Identifier: Apache-2.0 | |
| from bisect import bisect_right | |
| from dataclasses import dataclass, replace, field | |
| from typing import NamedTuple | |
| import sys | |
| from elftools.elf.elffile import ELFFile | |
| from elftools.dwarf.callframe import FDE | |
| from elftools.dwarf.constants import * | |
| class InvalidState(Exception): | |
| pass | |
| class State(NamedTuple): | |
| off: int = 0 | |
| cfa: int = -1 # reg | |
| cfa_off: int = 0 | |
| csrs: dict[int, int] = {} | |
| @dataclass | |
| class CUD: | |
| start: int | |
| size: int = 0 # analysis only | |
| prologue_start: int = 0 # offset into descriptor where the prologue instrs begin, -1 for no prologue | |
| pad_after_epilogue: int = 0 # -1 for no epilogue | |
| mode: str|None = None | |
| def emulate_cuds(cuds: list[CUD]): | |
| states = [] | |
| for cud in cuds: | |
| if cud.mode is None: | |
| states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1})) | |
| elif cud.mode[0] == 2: # RSP | |
| csrs = {16:1} | |
| off = cud.start | |
| frame_sub = cud.mode[1] - (8+8*len(cud.mode[2])) | |
| if cud.prologue_start != -1: | |
| states.append(State(off=off, cfa=7, cfa_off=8, csrs=csrs)) | |
| off += cud.prologue_start | |
| for i, reg in enumerate(cud.mode[2]): | |
| csrs = csrs | {reg:i+2} | |
| off += 1 if reg < 8 else 2 | |
| states.append(State(off=off, cfa=7, cfa_off=16+8*i, csrs=csrs)) | |
| off += 1 if frame_sub == 8 else 4 if frame_sub < 0x80 else 7 | |
| csrs = {16:1}|{reg:i+2 for i, reg in enumerate(cud.mode[2])} | |
| states.append(State(off=off, cfa=7, cfa_off=cud.mode[1], csrs=csrs)) | |
| if cud.pad_after_epilogue != -1: | |
| pop_size = sum(1 if reg < 8 else 2 for reg in cud.mode[2]) | |
| off = cud.start + cud.size - cud.pad_after_epilogue - pop_size | |
| if frame_sub: | |
| states.append(State(off=off, cfa=7, cfa_off=8+8*len(cud.mode[2]), csrs=csrs)) | |
| for i, reg in enumerate(cud.mode[2][::-1]): | |
| off += 1 if reg < 8 else 2 | |
| states.append(State(off=off, cfa=7, cfa_off=8*(len(cud.mode[2])-i), csrs=csrs)) | |
| elif cud.mode[0] == 1: | |
| csrs = {16:1}|{reg:i+2 for i, reg in enumerate(cud.mode[2])} | |
| off = cud.start + cud.prologue_start | |
| if cud.mode[1] + cud.prologue_start > 0: | |
| states.append(State(off=cud.start, cfa=7, cfa_off=8, csrs={16:1})) | |
| if cud.mode[1] > 0: | |
| states.append(State(off=off+1, cfa=7, cfa_off=16, csrs={16:1,6:2})) | |
| if cud.mode[1] > 4: | |
| states.append(State(off=off+4, cfa=6, cfa_off=16, csrs={16:1,6:2})) | |
| off += cud.mode[1] | |
| states.append(State(off=off, cfa=6, cfa_off=16, csrs=csrs)) | |
| if cud.pad_after_epilogue != -1: | |
| states.append(State(off=cud.start+cud.size-cud.pad_after_epilogue, cfa=7, cfa_off=8, csrs={16:1})) | |
| else: | |
| assert False | |
| return states | |
| @dataclass | |
| class CUDModeNULL: | |
| start: int | |
| def finalize(self): | |
| return CUD(start=self.start) | |
| @dataclass | |
| class CUDModeRBP: | |
| start: int | |
| csrs: dict[int, int] = field(default_factory=dict) | |
| prologue_size: int = 0 | |
| size: int = 0 | |
| def finalize(self): | |
| push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r]) | |
| if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6 or push_seq[0] != 6: | |
| raise InvalidState(f"invalid push order: {push_seq} {self.csrs}") | |
| return CUD(start=self.start, mode=[1, self.prologue_size, push_seq], size=self.size, pad_after_epilogue=(-1 if self.size == 0 else 0)) | |
| @dataclass | |
| class CUDModeRSP: | |
| start: int | |
| csrs: dict[int, int] = field(default_factory=dict) | |
| prologue_advances: list[tuple[int, int]] = field(default_factory=list) | |
| epilogue_advances: list[tuple[int, int]] = field(default_factory=list) | |
| body_size: int = 0 | |
| def finalize(self): | |
| cud = CUD(self.start) | |
| push_seq = sorted(set(self.csrs.keys()) - {16}, key=lambda r: self.csrs[r]) | |
| if not all(self.csrs[r] == i + 2 for i, r in enumerate(push_seq)) or len(push_seq) > 6: | |
| raise InvalidState(f"invalid rsp push order: {push_seq} {self.csrs}") | |
| frame_size = self.prologue_advances[-1][1] | |
| cud.mode = [2, frame_size, push_seq] | |
| prologue_size = 0 | |
| if len(self.prologue_advances) == 1 and self.prologue_advances[0][0] == 0: | |
| cud.prologue_start = -1 | |
| else: | |
| assert all((adv >= 0 for adv, _ in self.prologue_advances)) | |
| assert all((adv >= 0 for adv, _ in self.epilogue_advances)) | |
| if len(self.prologue_advances) < len(push_seq) or len(self.prologue_advances) > len(push_seq) + 1: | |
| raise InvalidState(f"invalid rsp prologue seq: {push_seq} {self.prologue_advances}") | |
| for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq, self.prologue_advances)): | |
| push_size = (1 if reg < 8 else 2) | |
| if i == 0 and adv > push_size: | |
| cud.prologue_start += adv - push_size | |
| adv = push_size | |
| prologue_size += push_size | |
| if adv != push_size or cfa_off != 16 + 8 * i: | |
| raise InvalidState(f"invalid rsp prologue seq at {i}: {push_seq} {self.prologue_advances}") | |
| if len(self.prologue_advances) > len(push_seq): | |
| adv = self.prologue_advances[-1][0] | |
| if len(self.prologue_advances) > 1: | |
| growth = frame_size - self.prologue_advances[-2][1] | |
| if not ((growth == 8 and adv == 1) or (growth < 0x80) and (adv == 4) or (growth >= 0x80 and adv == 7)): | |
| raise InvalidState(f"invalid rsp prologue rsp adjustment: {self.prologue_advances}") | |
| prologue_size += adv | |
| else: | |
| growth = frame_size - 8 | |
| if growth == 8: | |
| if adv < 1: | |
| raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}") | |
| cud.prologue_start += adv - 1 | |
| prologue_size += 1 | |
| elif growth <= 0x80: | |
| if adv < 4: | |
| raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}") | |
| cud.prologue_start += adv - 4 | |
| prologue_size += 4 | |
| elif growth > 0x80: | |
| if adv < 7: | |
| raise InvalidState(f"rsp too small adv? {adv} {self.prologue_advances} {push_seq}") | |
| cud.prologue_start += adv - 7 | |
| prologue_size += 7 | |
| else: | |
| assert frame_size == len(push_seq) * 8 + 8 | |
| prologue_end = 0 if cud.prologue_start == -1 else cud.prologue_start + prologue_size | |
| if not self.epilogue_advances: | |
| cud.pad_after_epilogue = -1 | |
| cud.size = prologue_end + self.body_size | |
| else: | |
| if len(self.epilogue_advances) < len(push_seq) or len(self.epilogue_advances) > len(push_seq) + 1: | |
| raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}") | |
| epilogue_start = prologue_end + self.body_size | |
| if len(self.epilogue_advances) == len(push_seq): | |
| pop_size = (1 if push_seq[-1] < 8 else 2) | |
| epilogue_start -= pop_size | |
| _, cfa = self.epilogue_advances[0] | |
| self.epilogue_advances.insert(0, (pop_size, cfa + 8)) | |
| epilogue_size = 0 | |
| for i, (reg, (adv, cfa_off)) in enumerate(zip(push_seq[::-1], self.epilogue_advances)): | |
| pop_size = (1 if reg < 8 else 2) | |
| epilogue_size += pop_size | |
| if adv != pop_size or cfa_off != 16 + 8 * (len(push_seq) - i - 1): | |
| raise InvalidState(f"invalid rsp epilogue seq at {i}: {push_seq} {self.epilogue_advances}") | |
| if self.epilogue_advances[-1][1] != 8: | |
| raise InvalidState(f"invalid rsp epilogue seq: {push_seq} {self.epilogue_advances}") | |
| cud.pad_after_epilogue = self.epilogue_advances[-1][0] | |
| cud.size = cud.pad_after_epilogue + epilogue_start + epilogue_size | |
| return cud | |
| def analyze_fde(fde): | |
| cuds = [] | |
| stack = [] | |
| state = State() | |
| state_history = [] | |
| cuds.append(CUDModeNULL(0)) | |
| CS_MODE_NULL = 2 # CFA=RSP+8 RIP=CFA-8 | |
| CS_PUSH_RBP = 3 # CFA=RSP+16 RIP=CFA-8 RBP=CFA-16, at a small offset <128 bytes | |
| CS_MODE_RBP = 4 # This is a RBP-based stack frame with zero extra CSRs. | |
| CS_MODE_RSP_PROLOGUE = 6 # RSP-based, in prologue | |
| CS_MODE_RSP_PROLOGUE_DELAY = 7 # RSP-based, in prologue, but no CSRs noted yet | |
| CS_MODE_RSP_EPILOGUE = 8 # RSP-based, in epilogue | |
| cudstate = CS_MODE_NULL | |
| last_advance = 0 | |
| last_state = State(state) | |
| for inst in fde.cie.instructions + fde.instructions + [None]: | |
| opcode = inst.opcode if inst else DW_CFA_advance_loc4 | |
| args = inst.args if inst else [fde.header.address_range - state.off] | |
| # print(inst) | |
| if opcode == DW_CFA_nop: | |
| pass | |
| elif opcode == DW_CFA_undefined: | |
| raise InvalidState("DW_CFA_undefined") | |
| elif opcode == DW_CFA_GNU_args_size: | |
| raise InvalidState("DW_CFA_GNU_args_size") | |
| elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_offset: | |
| state = state._replace(csrs=state.csrs | {args[0]: args[1]}) | |
| elif opcode >= 0x40 and opcode & 0xc0 == DW_CFA_restore: | |
| state = state._replace(csrs=dict(state.csrs)) | |
| del state.csrs[args[0]] | |
| elif opcode == DW_CFA_def_cfa_register: | |
| state = state._replace(cfa=args[0]) | |
| elif opcode == DW_CFA_def_cfa_offset: | |
| state = state._replace(cfa_off=args[0]) | |
| elif opcode == DW_CFA_def_cfa: | |
| state = state._replace(cfa=args[0], cfa_off=args[1]) | |
| elif opcode == DW_CFA_remember_state: | |
| stack.append(state) | |
| elif opcode == DW_CFA_restore_state: | |
| state = stack.pop()._replace(off=state.off) | |
| elif (opcode < 0xc0 and opcode & 0xc0 == DW_CFA_advance_loc) or opcode in (DW_CFA_advance_loc1, DW_CFA_advance_loc2, DW_CFA_advance_loc4): | |
| # print(last_advance, cudstate, cuds, state, inst, args) | |
| state_history.append(state) | |
| if cudstate == CS_MODE_NULL: | |
| assert(type(cuds[-1]) == CUDModeNULL) | |
| if state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}: | |
| cudstate = CS_MODE_NULL | |
| elif state.cfa == 7 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}: | |
| cudstate = CS_PUSH_RBP | |
| elif state.cfa == 7 and state.cfa_off > 8 and len(state.csrs) > 1: | |
| cudstate = CS_MODE_RSP_PROLOGUE | |
| advance = last_advance if cuds[-1].start < state.off else 0 | |
| if advance == 0 or (len(state.csrs) == 2 and state.cfa_off == 16) or (len(state.csrs) == 1): | |
| cuds.append(CUDModeRSP(state.off - advance)) | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((advance, state.cfa_off)) | |
| else: | |
| # Needs NULL descriptor in between | |
| cuds.append(CUDModeRSP(state.off)) | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((0, state.cfa_off)) | |
| elif state.cfa == 7 and state.cfa_off > last_state.cfa_off and len(state.csrs) == 1: | |
| cudstate = CS_MODE_RSP_PROLOGUE_DELAY | |
| cuds.append(CUDModeRSP(state.off - last_advance)) # XXX: not accurate? | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((last_advance, state.cfa_off)) | |
| elif state.cfa == 6 and state.cfa_off == 16: | |
| cudstate = CS_MODE_RBP | |
| cuds.append(CUDModeRBP(state.off)) | |
| cuds[-1].csrs = state.csrs | |
| else: | |
| raise InvalidState(f"invalid state reached 2: {cudstate} {repr(state)}") | |
| elif cudstate == CS_MODE_RSP_PROLOGUE_DELAY: | |
| if state.cfa != 7: | |
| raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}") | |
| elif last_state.csrs == state.csrs and last_state.cfa_off < state.cfa_off: | |
| cuds[-1].prologue_advances.append((last_advance, state.cfa_off)) | |
| elif last_state.cfa_off < state.cfa_off: | |
| assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs) | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((last_advance, state.cfa_off)) | |
| cudstate = CS_MODE_RSP_PROLOGUE | |
| elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off: | |
| cudstate = CS_MODE_RSP_EPILOGUE | |
| cuds[-1].body_size = last_advance | |
| cuds[-1].epilogue_advances.append((args[0], state.cfa_off)) | |
| else: | |
| raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}") | |
| elif cudstate == CS_MODE_RSP_PROLOGUE: | |
| if state.cfa != 7: | |
| raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}") | |
| if last_state.cfa_off < state.cfa_off: | |
| assert all(last_state.csrs[r] == state.csrs[r] for r in last_state.csrs) | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((last_advance, state.cfa_off)) | |
| elif last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off: | |
| cudstate = CS_MODE_RSP_EPILOGUE | |
| cuds[-1].body_size = last_advance | |
| cuds[-1].epilogue_advances.append((args[0], state.cfa_off)) | |
| elif state.cfa == 7 and state.cfa_off == 8 and state.csrs == {16: 1}: | |
| cudstate = CS_MODE_NULL | |
| cuds[-1].body_size = last_advance | |
| if len(last_state.csrs) == 2 and last_state.cfa_off == 16: | |
| cuds[-1].epilogue_advances.append((args[0], state.cfa_off)) | |
| # The current advance is folded into the epilogue | |
| cuds.append(CUDModeNULL(state.off + args[0])) | |
| else: | |
| cuds.append(CUDModeNULL(state.off)) | |
| else: | |
| raise InvalidState(f"invalid state reached: {cudstate} {repr(state)} {repr(last_state)}") | |
| elif cudstate == CS_MODE_RSP_EPILOGUE: | |
| if state.cfa != 7: | |
| raise InvalidState(f"mode rsp now uses something else? {cudstate} {repr(state)}") | |
| if last_state.csrs == state.csrs and last_state.cfa_off > state.cfa_off: | |
| cuds[-1].epilogue_advances.append((args[0], state.cfa_off)) | |
| elif state.cfa_off == 8 and state.csrs == {16: 1}: | |
| if cuds[-1].epilogue_advances and cuds[-1].epilogue_advances[-1][1] == 8: | |
| # E.g., pop, ret, something else without stack frame | |
| cuds[-1].epilogue_advances[-1] = (cuds[-1].epilogue_advances[-1][0] + args[0], 8) | |
| else: | |
| cuds[-1].epilogue_advances.append((args[0], state.cfa_off)) | |
| cudstate = CS_MODE_NULL | |
| # The current advance is folded into the epilogue | |
| cuds.append(CUDModeNULL(state.off + args[0])) | |
| else: | |
| cudstate = CS_MODE_RSP_PROLOGUE | |
| cuds.append(CUDModeRSP(state.off)) | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((0, state.cfa_off)) | |
| elif cudstate == CS_PUSH_RBP: | |
| if state.cfa == 6 and state.cfa_off == 16 and state.csrs == {16: 1, 6: 2}: | |
| if last_advance != 3: | |
| raise InvalidState(f"invalid state, instruction between push rbp/mov rbp,rsp? {repr(state)}") | |
| cudstate = CS_MODE_RBP | |
| cuds.append(CUDModeRBP(state.off - 4, state.csrs)) | |
| cuds[-1].prologue_size = 4 | |
| elif state.cfa == 7 and state.cfa_off == 24 and len(state.csrs) == 3 and state.csrs.get(16) == 1 and state.csrs.get(6) == 2: | |
| # Second push, must be rsp | |
| cudstate = CS_MODE_RSP_PROLOGUE | |
| pushed_reg, _ = next(iter(x for x in state.csrs.items() if x[0] not in (16, 6))) | |
| if last_advance != (1 if pushed_reg < 8 else 2): | |
| raise InvalidState(f"invalid state, instruction between pushes? {repr(state)}") | |
| start = state.off - (1 if pushed_reg < 8 else 2) - 1 | |
| cuds.append(CUDModeRSP(start)) | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_advances.append((1, last_state.cfa_off)) | |
| cuds[-1].prologue_advances.append((last_advance, state.cfa_off)) | |
| else: | |
| raise InvalidState(f"invalid state reached 3: {cudstate} {repr(state)}") | |
| elif cudstate == CS_MODE_RBP: | |
| if state.cfa == 6 and state.cfa_off == 16: | |
| if len(state.csrs) > len(last_state.csrs): | |
| if not all(last_state.csrs[r] == state.csrs.get(r) for r in last_state.csrs): | |
| raise InvalidState(f"RBP CSR mismatch? {last_state} -> {state}") | |
| cuds[-1].csrs = state.csrs | |
| cuds[-1].prologue_size = state.off - cuds[-1].start | |
| elif state.cfa == 7 and state.cfa_off == 8: | |
| # cuds[-1].epilogue = True | |
| cuds[-1].size = state.off - cuds[-1].start | |
| cudstate = CS_MODE_NULL | |
| cuds.append(CUDModeNULL(state.off)) | |
| else: | |
| raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}") | |
| else: | |
| raise InvalidState(f"invalid state reached: {cudstate} {repr(state)}") | |
| state = state._replace(off=state.off + args[0]) | |
| last_advance = args[0] | |
| last_state = state | |
| else: | |
| raise InvalidState(f"unhandled opcode {inst}") | |
| assert state.off == fde.header.address_range | |
| cuds = [cud.finalize() for cud in cuds] | |
| # Extend last CUD to end of function | |
| if cuds[-1].start + cuds[-1].size != state.off: | |
| remaining = state.off - cuds[-1].start | |
| if not cuds[-1].mode or cuds[-1].pad_after_epilogue == -1: | |
| cuds[-1].size = remaining | |
| elif cuds[-1].pad_after_epilogue > 0 and cuds[-1].pad_after_epilogue + remaining < 128: | |
| cuds[-1].pad_after_epilogue += remaining - cuds[-1].size | |
| cuds[-1].size = remaining | |
| else: | |
| cuds.append(CUD(start=cuds[-1].start + cuds[-1].size, size=remaining)) | |
| for i in range(len(cuds)): | |
| if cuds[i].mode: | |
| continue | |
| # Fix NULL CUD size. | |
| if i+1 < len(cuds): | |
| cuds[i].size = cuds[i+1].start - cuds[i].start | |
| # Fold NULL CUDs into prologue/epilogue of adjacent descriptors. | |
| if i+1 < len(cuds) and cuds[i+1].prologue_start >= 0 and cuds[i].size + cuds[i+1].prologue_start < 128: | |
| cuds[i+1].start -= cuds[i].size | |
| cuds[i+1].size += cuds[i].size | |
| cuds[i+1].prologue_start += cuds[i].size | |
| cuds[i].size = 0 | |
| if i > 0 and cuds[i-1].pad_after_epilogue >= 0 and cuds[i].size + cuds[i-1].pad_after_epilogue < 128: | |
| cuds[i-1].size += cuds[i].size | |
| cuds[i-1].pad_after_epilogue += cuds[i].size | |
| cuds[i].size = 0 | |
| # Drop zero-sized NULL CUDs. | |
| cuds = [cud for cud in cuds if cud.size or cud.mode] | |
| assert cuds[-1].start + cuds[-1].size == fde.header.address_range | |
| # Verify that cuds are accurate. | |
| cud_states = emulate_cuds(cuds) | |
| offs_to_validate = {s.off for s in state_history} | {s.off for s in cud_states} | |
| for off in sorted(offs_to_validate): | |
| dwarf_state = state_history[bisect_right(state_history, off, key=lambda s: s.off)-1] | |
| cud_state = cud_states[bisect_right(cud_states, off, key=lambda s: s.off)-1] | |
| # We can't really validate CSRs, but things generally look good. | |
| # There might be some minor bugs, but this code isn't intending to be | |
| # perfect. | |
| #or any(cud_state.csrs.get(r) != o for r, o in dwarf_state.csrs.items()) | |
| if (dwarf_state.cfa != cud_state.cfa or dwarf_state.cfa_off != cud_state.cfa_off): | |
| print(cuds) | |
| for x in state_history: | |
| print(x) | |
| for x in cud_states: | |
| print(x) | |
| raise InvalidState("validation failed") | |
| return cuds | |
| if __name__ == "__main__": | |
| for arg in sys.argv[1:]: | |
| with open(arg, "rb") as f: | |
| elf = ELFFile(f) | |
| if not elf.has_dwarf_info(): | |
| print(f'{arg} has no DWARF info') | |
| continue | |
| dwarfinfo = elf.get_dwarf_info() | |
| for entry in dwarfinfo.EH_CFI_entries(): | |
| if type(entry) == FDE: | |
| addr = entry.header["initial_location"] | |
| print("ADDR", hex(addr)) | |
| try: | |
| cuds = analyze_fde(entry) | |
| print("LEN", len(cuds)) | |
| for cud in cuds: | |
| print("CUD-ALL", cud.prologue_start, cud.pad_after_epilogue, cud.mode) | |
| print("CUD-MODE", cud.mode) | |
| if cud.mode: | |
| print("CUD-CSR", cud.mode[2]) | |
| print("DETAIL", cuds) | |
| except InvalidState as e: | |
| print("ERROR", e) | |
| pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment