Created
April 10, 2023 22:43
-
-
Save alexander-hanel/7ad79cdbfcf08e0e56b17817267382dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from anytree import Node, RenderTree, find | |
""" | |
Author: Alexander Hanel | |
Description: POC for displaying function names as a folder-like structure. Relies on function names being labled with a pdb. | |
Version: 0.5 - 2023/04/10 | |
Execution: open script in IDA, run export_layout() to save to file or export_layout() to print to command line | |
TODO: | |
- review how mangled names are used in IDA. I'm seeing some strange results. | |
- aad functionality for Go function names | |
- maybe create folders in IDA | |
- dig deeper into how function names are parsed in C/C++. MS has some weird naming conventions | |
Example Output from clr.dll | |
├── CInstall | |
│ ├── Ref | |
│ │ ├── Enum | |
│ │ │ ├── public: void * CInstallRefEnum::`scalar deleting destructor'(unsigned int) | |
│ │ │ ├── public: long CInstallRefEnum::CleanUpRegKeys(void) | |
│ │ │ ├── public: static long CInstallRefEnum::Create(struct IAssemblyName *,enum GACSLICE,int,class CInstallRefEnum * *) | |
│ │ │ ├── private: long CInstallRefEnum::GetNextIdentifier(unsigned short *,unsigned long *,unsigned char *,unsigned long *) | |
│ │ │ ├── public: long CInstallRefEnum::GetNextRef(unsigned long,unsigned short *,unsigned long *,unsigned short *,unsigned long *,unsigned long *,void *) | |
│ │ │ ├── private: long CInstallRefEnum::GetNextScheme(void) | |
│ │ │ ├── private: long CInstallRefEnum::Init(struct IAssemblyName *,enum GACSLICE,int) | |
│ │ │ └── private: long CInstallRefEnum::ValidateRegKey(struct HKEY__ * &) | |
│ │ ├── public: long CInstallRef::AddReference(void) | |
│ │ ├── public: long CInstallRef::DeleteReference(void) | |
│ │ └── public: long CInstallRef::Initialize(void) | |
│ └── Reference | |
│ ├── Item | |
│ │ ├── public: void * CInstallReferenceItem::`scalar deleting destructor'(unsigned int) | |
│ │ ├── public: virtual unsigned long CInstallReferenceItem::AddRef(void) | |
│ │ ├── public: virtual long CInstallReferenceItem::GetReference(struct _FUSION_INSTALL_REFERENCE_ * *,unsigned long,void *) | |
│ │ ├── public: virtual long CInstallReferenceItem::QueryInterface(struct _GUID const &,void * *) | |
│ │ └── public: virtual unsigned long CInstallReferenceItem::Release(void) | |
│ └── Enum | |
│ ├── public: virtual long CInstallReferenceEnum::GetNextInstallReferenceItem(struct IInstallReferenceItem * *,unsigned long,void *) | |
│ └── public: virtual unsigned long CInstallReferenceEnum::Release(void) | |
├── CVerify | |
│ └── Ref | |
│ └── Node | |
│ ├── public: void * CVerifyRefNode::`scalar deleting destructor'(unsigned int) | |
│ └── public: long CVerifyRefNode::Init(unsigned long,unsigned short const *) | |
""" | |
# TODO | |
KEYWORDS = ['alignas', 'constinit', 'false', 'public', 'true', 'alignof', 'const_cast', 'float', 'register', | |
'reinterpret_cast', 'typedef', 'auto', 'co_await', 'friend', 'requires', 'typeid', 'bool', | |
'co_return', 'typename', 'co_yield', 'short', 'union','decltype', 'signed', 'unsigned', | |
'catch', 'default', 'int', 'sizeof', 'using', 'char', 'delete', 'long', 'static', 'virtual', | |
'char8_t', 'mutable', 'static_assert', 'void', 'char16_t', 'double', 'namespace', 'static_cast', | |
'volatile', 'char32_t', 'dynamic_cast', 'new', 'struct', 'wchar_t', 'class', 'concept', 'enum', | |
'nullptr', 'template', 'const', 'explicit', 'operator', 'this', 'consteval', 'export', 'private', | |
'thread_local', 'constexpr', 'extern', 'protected', 'throw', "*"] | |
def test_data(): | |
with open("demangled_functions.txt", "r") as f: | |
data_lines = [line.rstrip() for line in f] | |
return data_lines | |
def create_nodes(item_names): | |
""" | |
converts a list (e.g [1,2,3]) into list of nodes (e.g.[(1,2), (2,3)]) | |
:param item_names: list | |
:return: | |
""" | |
if len(item_names) == 1: | |
return item_names | |
else: | |
return [(item_names[c], item_names[c+1] ) for c,y in enumerate(item_names[:-1])] | |
def parse_ms_pdb_func_def(declaration): | |
# notes: https://learn.microsoft.com/en-us/cpp/cpp/functions-cpp?source=recommendations&view=msvc-170 | |
scope_resolution_operator = "::" | |
parentheses_start = "(" | |
template_start = "<" | |
method_name = None | |
split_function = declaration.split(parentheses_start) | |
# split function declaration by parentheses | |
# public: virtual long RegMeta::MergeEnd(void) | |
# ^-- split here | |
# the first parentheses will always be present or its not a function declaration | |
# get everything before "(" | |
split_declaration = split_function[0] | |
# three uses cases for parsing out function/method names | |
# 1. scope resolution operator is present (e.g. "public: virtual long RegMeta::MergeEnd") | |
# 2. not present (e.g. "int ScaleResult) | |
if scope_resolution_operator in split_declaration: | |
# "public: virtual long RegMeta::MergeEnd" becomes ['public: virtual long RegMeta', 'MergeEnd'] | |
temp_class_def = split_declaration.split(scope_resolution_operator) | |
if template_start in temp_class_def[0]: | |
# examples | |
# public: CChainedHash<struct MDTOKENHASH>::~CChainedHash<struct MDTOKENHASH>(void) | |
# public: struct MDTOKENHASH * CChainedHash<struct MDTOKENHASH>::Add(void const *) | |
method_name = temp_class_def[-1] | |
# split by template | |
temp_split = temp_class_def[0].split(template_start) | |
temp_name = temp_split[0] | |
function_name = temp_name.split()[-1] | |
# function_name = token[-1] | |
else: | |
method_name = temp_class_def[-1] | |
tokens = temp_class_def[0].split() | |
# not exactly function, it would actually be a class but it simplifies the code | |
function_name = tokens[-1] | |
else: | |
tokens = split_declaration.split() | |
function_name = tokens[-1] | |
# split up the pascal case into substrings | |
if function_name.isupper() and function_name.isalpha(): | |
return ([function_name], method_name ) | |
else: | |
temp_matches = re.findall('[A-Z]{1,4}[^A-Z]*', function_name) | |
return (temp_matches, method_name) | |
def parse(declarations): | |
names = set([]) | |
root = Node("root") # create parent node | |
for declaration in declarations: | |
temp_matches, method_name = parse_ms_pdb_func_def(declaration) | |
if len(temp_matches) > 1: | |
names.add(temp_matches[0]) | |
temp_nodes = create_nodes(temp_matches) | |
if not temp_nodes: | |
continue | |
# init | |
p_node = None | |
for _node in temp_nodes: | |
p, c = _node | |
# determine if key exists under root | |
if p_node is None: | |
r_match = find(root, lambda node: node.name == p, maxlevel=2) | |
if r_match: | |
# node already exists and is | |
p_node = r_match | |
else: | |
# p_node is now has a child of parent | |
p_node = Node(p, parent=root) | |
r_match = find(p_node, lambda node: node.name == c, maxlevel=2) | |
if r_match: | |
p_node = r_match | |
else: | |
p_node = Node(c, parent=p_node) | |
Node(declaration, parent=p_node) | |
return root | |
def _print(root): | |
for pre, fill, node in RenderTree(root): | |
print("%s%s" % (pre, node.name)) | |
def _save_layout(root): | |
idb_path = idc.get_idb_path() | |
import sys | |
import os | |
if sys.version_info[0] < 3.4: | |
import pathlib | |
suffix = pathlib.Path(idb_path).suffix | |
else: | |
suffix = os.path.splitext(idb_path)[1] | |
text_path = idb_path.replace(suffix, ".txt") | |
with open(text_path, "w") as export_file: | |
for pre, fill, node in RenderTree(root): | |
export_file.write("%s%s\n" % (pre, node.name)) | |
print("Export written to %s" % text_path) | |
def get_function_names(): | |
import idautils | |
import idc | |
func_names = [] | |
for mangled in idautils.Functions(): | |
demangled = idc.demangle_name(idaapi.get_func_name(mangled), idc.INF_SHORT_DN) | |
if demangled: | |
func_names.append(demangled) | |
else: | |
temp_name = idaapi.get_func_name(mangled) | |
if temp_name.startswith("sub_") or "@@" in temp_name: | |
continue | |
func_names.append(temp_name) | |
return func_names | |
def test(): | |
lines = test_data() | |
parse(lines) | |
def print_layout(): | |
lines = get_function_names() | |
root = parse(lines) | |
_print(root) | |
def export_layout(): | |
lines = get_function_names() | |
root = parse(lines) | |
_save_layout(root) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
More output