Created
March 15, 2022 02:18
-
-
Save devanlai/bd2c6b2587e7ee802df8cb72d6e239e9 to your computer and use it in GitHub Desktop.
Sample script to parse Zephyr shell command trees from an ELF file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Parse a Zephyr firmware ELF file to extract its statically registered | |
shell commands using the linker section data structure. | |
This script requires pyelftools, which can be installed using: | |
pip install pyelftools | |
""" | |
import argparse | |
import struct | |
import sys | |
import elftools | |
import elftools.elf.elffile | |
import elftools.elf.sections | |
from collections import OrderedDict, namedtuple | |
def extract_symbol(sym_addr, sym_size, section_data, section_addr): | |
""" | |
Extracts the byte contents of the named symbol object, looking in | |
the contents of `section_data`, assuming the data starts at `section_addr` | |
""" | |
offset = sym_addr - section_addr | |
if offset < 0 or (offset + sym_size) > len(section_data): | |
raise ValueError("Symbol address 0x{:08X} out of bounds [0x{:08X}, 0x{:08X}))".format( | |
sym_addr, section_addr, section_addr + len(section_data))) | |
return section_data[offset:offset+sym_size] | |
def read_string(ptr, lookup_func, max_len=1024, encoding="utf-8"): | |
count = 0 | |
string_bytes = bytearray() | |
b = lookup_func(ptr, 1) | |
ptr += 1 | |
while b != b'\x00': | |
count += 1 | |
string_bytes.append(b[0]) | |
b = lookup_func(ptr, 1) | |
ptr += 1 | |
if max_len is not None and count > max_len: | |
print(bytes(string_bytes).decode(encoding)) | |
raise ValueError("String would exceed maximum expected length") | |
if encoding is not None: | |
return bytes(string_bytes).decode(encoding) | |
else: | |
return bytes(string_bytes) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Extract statically registered shell command info") | |
parser.add_argument("elf_file", | |
type=argparse.FileType("rb"), | |
help="Firmware elf file to read") | |
args = parser.parse_args() | |
elf = elftools.elf.elffile.ELFFile(args.elf_file) | |
# Read the symbol table so we can lookup symbols by name | |
symtab_section = elf.get_section_by_name(".symtab") | |
def find_symbol(name): | |
try: | |
[symbol] = symtab_section.get_symbol_by_name(name) | |
except (TypeError, ValueError) as e: | |
raise ValueError | |
return symbol | |
# Read the iterable root shell command list section | |
root_cmds_section = elf.get_section_by_name("shell_root_cmds_sections") | |
root_cmds_data = root_cmds_section.data() | |
root_cmds_addr = root_cmds_section["sh_addr"] | |
# Read the general rodata section for everything else | |
rodata_section = elf.get_section_by_name("rodata") | |
rodata_data = rodata_section.data() | |
rodata_addr = rodata_section["sh_addr"] | |
def read_shell_root_cmd_bytes(ptr, size): | |
"Retrieve the `size` bytes stored at `ptr`" | |
return extract_symbol(ptr, size, root_cmds_data, root_cmds_addr) | |
def read_rodata_bytes(ptr, size): | |
"Retrieve the `size` bytes stored at `ptr`" | |
return extract_symbol(ptr, size, rodata_data, rodata_addr) | |
# Lookup the array of shell root commands | |
try: | |
shell_root_cmd_array_start_symbol = find_symbol("__shell_root_cmds_start") | |
shell_root_cmd_array_end_symbol = find_symbol("__shell_root_cmds_end") | |
except ValueError: | |
sys.stderr.write('Failed to find shell root cmd array symbols\n') | |
sys.exit(1) | |
shell_root_cmd_array_start = shell_root_cmd_array_start_symbol["st_value"] | |
shell_root_cmd_array_end = shell_root_cmd_array_end_symbol["st_value"] | |
# Walk the array of root shell command entries | |
ptr = shell_root_cmd_array_start | |
static_command_pointers = [] | |
while ptr < shell_root_cmd_array_end: | |
shell_cmd_entry_bytes = read_shell_root_cmd_bytes(ptr, 8) | |
is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes) | |
if not is_dynamic: | |
static_command_pointers.append(entry_ptr) | |
ptr += 8 | |
# Recursively decode all commands, starting from the root commands | |
# and traversing all reachable static sub commands | |
root_commands = [] | |
command_registry = {} | |
def decode_shell_static_entry(ptr): | |
if ptr in command_registry: | |
return command_registry[ptr] | |
try: | |
shell_static_entry_bytes = read_rodata_bytes(ptr, 20) | |
except: | |
return None | |
fields = struct.unpack("<IIIIBBxx", shell_static_entry_bytes) | |
syntax_ptr, help_ptr, subcmd_ptr, handler_func_ptr, num_req_args, num_opt_args = fields | |
if syntax_ptr == 0: | |
return None | |
syntax_string = read_string(syntax_ptr, read_rodata_bytes) if syntax_ptr != 0 else "" | |
help_string = read_string(help_ptr, read_rodata_bytes) if help_ptr != 0 else "" | |
sub_commands = [] | |
command_entry = (syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) | |
command_registry[ptr] = command_entry | |
if subcmd_ptr != 0: | |
shell_cmd_entry_bytes = read_rodata_bytes(subcmd_ptr, 8) | |
is_dynamic, entry_ptr = struct.unpack("<BxxxI", shell_cmd_entry_bytes) | |
if not is_dynamic and entry_ptr != 0: | |
sub_command = decode_shell_static_entry(entry_ptr) | |
while sub_command is not None: | |
sub_commands.append(sub_command) | |
entry_ptr += 20 | |
sub_command = decode_shell_static_entry(entry_ptr) | |
return command_entry | |
for ptr in static_command_pointers: | |
root_commands.append(decode_shell_static_entry(ptr)) | |
root_commands.sort(key = lambda x:x[0]) | |
# Convert to a flattened command list with nesting information | |
command_list = [] | |
visited = set() | |
fringe = [(root_command, 0) for root_command in root_commands] | |
while fringe: | |
(command, depth) = fringe.pop(0) | |
(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command | |
for sub_command in sorted(sub_commands, key=lambda x:x[0]): | |
fringe.insert(0, ((sub_command, depth + 1))) | |
command_list.append((command, depth)) | |
# Map each command handler to a symbol name if possible | |
handlers_to_lookup = [] | |
for (command, depth) in command_list: | |
(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command | |
if handler_func_ptr != 0: | |
handlers_to_lookup.append(handler_func_ptr) | |
handler_name_table = {} | |
for symbol in symtab_section.iter_symbols(): | |
if symbol.entry.get("st_value") in handlers_to_lookup: | |
handler_name_table[symbol.entry.get("st_value")] = symbol.name | |
# Look up the source file and line of each command handler if possible | |
handler_source_table = {} | |
dwarfinfo = elf.get_dwarf_info() | |
if dwarfinfo: | |
for CU in dwarfinfo.iter_CUs(): | |
# First, look at line programs to find the file/line for the address | |
lineprog = dwarfinfo.line_program_for_CU(CU) | |
prevstate = None | |
for entry in lineprog.get_entries(): | |
# We're interested in those entries where a new state is assigned | |
if entry.state is None: | |
continue | |
# Looking for a range of addresses in two consecutive states that | |
# contain the required address. | |
for handler_func_ptr in handlers_to_lookup: | |
if prevstate and prevstate.address <= handler_func_ptr < entry.state.address: | |
filename = lineprog['file_entry'][prevstate.file - 1].name.decode("utf-8") | |
line = prevstate.line | |
handler_source_table[handler_func_ptr] = (filename, line) | |
if entry.state.end_sequence: | |
# For the state with `end_sequence`, `address` means the address | |
# of the first byte after the target machine instruction | |
# sequence and other information is meaningless. We clear | |
# prevstate so that it's not used in the next iteration. Address | |
# info is used in the above comparison to see if we need to use | |
# the line information for the prevstate. | |
prevstate = None | |
else: | |
prevstate = entry.state | |
# Display command tree | |
for (command, depth) in command_list: | |
(syntax_string, help_string, sub_commands, handler_func_ptr, num_req_args, num_opt_args) = command | |
if handler_func_ptr != 0: | |
handler_name = handler_name_table.get(handler_func_ptr) | |
handler_source = handler_source_table.get(handler_func_ptr) | |
if handler_name is not None and handler_source is not None: | |
print("{:s}{:s} - {:s} [{} from {}:{}]".format(" "*depth, syntax_string, help_string, handler_name, handler_source[0], handler_source[1])) | |
elif handler_name is not None: | |
print("{:s}{:s} - {:s} [{}]".format(" "*depth, syntax_string, help_string, handler_name)) | |
else: | |
print("{:s}{:s} - {:s}".format(" "*depth, syntax_string, help_string)) | |
else: | |
print("{:s}{:s} - {:s}".format(" "*depth, syntax_string, help_string)) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment