Skip to content

Instantly share code, notes, and snippets.

@pgoodman
Last active August 8, 2024 23:56
Show Gist options
  • Save pgoodman/f901583fc097c363b6c65b948af1cb05 to your computer and use it in GitHub Desktop.
Save pgoodman/f901583fc097c363b6c65b948af1cb05 to your computer and use it in GitHub Desktop.
Group functions by their type
# Copyright 2024, Peter Goodman. All rights reserved.
"""
This program approximately groups functions by their types, and then prints
out the grouped functions.
XREF: https://x.com/eatonphil/status/1821573274582823247
"""
import argparse
import collections
from typing import List, DefaultDict, Set, Optional
import multiplier as mx
def sep(out: List[str], prev_out_len: int, tok: str):
if len(out) != prev_out_len:
out.append(tok)
def mangle(ty: mx.ast.Type, out: List[str],
strip_trivial_qualifiers: bool = False,
func: Optional[mx.ast.FunctionDecl] = None) -> None:
# TODO(pag): Look into throwing, `const`-qualified methods, etc.
if isinstance(ty, mx.ast.FunctionType):
out.append("Func(")
out_len = len(out)
# Try to inject the `this` pointer type.
if isinstance(func, mx.ast.CXXMethodDecl):
if this_ty := func.this_type:
mangle(this_ty, out, False)
if isinstance(ty, mx.ast.FunctionProtoType):
for pty in ty.parameter_types:
sep(out, out_len, ", ")
mangle(pty, out, True)
if ty.is_variadic:
sep(out, out_len, ", ")
out.append("...")
else:
sep(out, out_len, ", ")
out.append("...")
out.append("; ")
mangle(ty.return_type, out, True)
out.append(")")
elif isinstance(ty, mx.ast.QualifiedType):
bty = ty.unqualified_desugared_type
# Try to double check if we think we should ignore trivial qualifiers.
# The idea here is that if you have two functions, one taking an `int`
# parameter, and the other taking a `const int` parameter, then their
# prototypes are functionally identical. The `const` qualifier really
# only impacts whether or not the bodies of those functions can assign
# to those parameters.
#
# Similarly, we always want to strip off a `const` qualified on
# something like an `int` return type, because `const` on the return
# type is mostly meaningless.
if strip_trivial_qualifiers:
if isinstance(bty, mx.ast.PointerType):
if not isinstance(bty.pointee_type, mx.ast.FunctionType):
strip_trivial_qualifiers = False
elif isinstance(bty, mx.ast.ReferenceType):
strip_trivial_qualifiers = False
if not strip_trivial_qualifiers:
out_len = len(out)
if ty.is_const_qualified:
out.append("C")
if ty.is_restrict_qualified:
out.append("V")
if ty.is_restrict_qualified:
out.append("R")
if ty.has_address_space:
out.append(f"-{ty.address_space.name}")
sep(out, out_len, " ")
mangle(bty, out, False)
elif isinstance(ty, mx.ast.BuiltinType):
out.append(str(ty.builtin_kind.name))
elif isinstance(ty, mx.ast.TypedefType):
mangle(ty.declaration.underlying_type, out, strip_trivial_qualifiers)
elif isinstance(ty, mx.ast.UsingType):
mangle(ty.underlying_type, out, strip_trivial_qualifiers)
elif isinstance(ty, mx.ast.TagType):
decl = ty.declaration.canonical_declaration
out.append(f"{decl.kind.name}({decl.id})")
elif isinstance(ty, mx.ast.AtomicType):
out.append("Atomic(")
mangle(ty.value_type, out, False)
out.append(")")
# NOTE(pag): Ignores all the derived classes.
elif isinstance(ty, mx.ast.VectorType):
out.append("Vector(")
mangle(ty.element_type, out, False)
out.append(", ")
out.append(ty.vector_kind.name)
out.append(")")
# NOTE(pag): Ignores all the derived classes.
elif isinstance(ty, mx.ast.ArrayType):
out.append("Array(")
mangle(ty.element_type, out, False)
out.append(", ")
out.append(ty.size_modifier.name)
out.append(")")
elif isinstance(ty, mx.ast.TemplateSpecializationType):
if aliased_ty := ty.aliased_type:
mangle(aliased_ty, out, strip_trivial_qualifiers)
else:
out.append("?")
elif isinstance(ty, (mx.ast.PointerType, mx.ast.ReferenceType)):
name = ty.__class__.__name__[:-4]
out.append(f"{name}(")
mangle(ty.pointee_type, out, False)
out.append(")")
elif isinstance(ty, mx.ast.ParenType):
mangle(ty.inner_type, out, strip_trivial_qualifiers)
elif isinstance(ty, mx.ast.MacroQualifiedType):
mangle(ty.underlying_type, out, strip_trivial_qualifiers)
else:
if ty.__class__.__name__.startswith("Dependent"):
raise Exception("Ignoring dependent types")
# TODO(pag): Add more.
out.append(ty.kind.name)
def canon_type(ty: mx.ast.Type, out: List):
if isinstance(ty, mx.ast.FunctionType):
out.append(ty.kind)
parser = argparse.ArgumentParser(description="Multiplier Code Browser")
parser.add_argument('--db', type=str,
required=True,
help="Path to mx-index-produced database")
args = parser.parse_args()
index = mx.Index.in_memory_cache(mx.Index.from_database(args.db))
grouped_functions: DefaultDict[str, List[mx.ast.FunctionDecl]] = collections.defaultdict(list)
seen: Set[int] = set()
# Iterate over all functions in the project, and group them by a form of their
# mangled type.
for func in mx.ast.FunctionDecl.IN(index):
func = func.canonical_declaration
# Don't repeat more of the same function.
func_id = func.id
if func_id in seen:
continue
seen.add(func_id)
out: List[str] = []
try:
mangle(func.type, out, False, func)
# Likely a dependent type, i.e. an unspecialized function template, or a
# function inside of a template. We can't reliably compare dependent
# function types.
except:
continue
type_str: str = "".join(out)
grouped_functions[type_str].append(func)
name_config = mx.ast.QualifiedNameRenderOptions(fully_qualified=True)
flc = mx.frontend.FileLocationCache()
first = True
# Dump out the groups functions.
for funcs in grouped_functions.values():
# Skip groups with only one function
if len(funcs) == 1:
continue
if not first:
print()
print("///////////////////////////////////////////////////////////////")
first = False
nested_first = True
for func in funcs:
func_file_toks = func.tokens.file_tokens
if not nested_first:
print()
nested_first = False
# Get the fully qualified name. This will let the human distinguish
# methods.
func_name = func.qualified_name(name_config).data
# Print out the name and optionally the location of the function. The
# function's name as printed may appear different than what shows up
# in the code snippet below. This can happen if one function has an
# `alias` attribute for another, or if the definition of the function
# in the file is actually subject to macro substitution.
first_tok = func_file_toks.front
if file := mx.frontend.File.containing(first_tok):
path = str(next(file.paths))
if line_col := first_tok.location(flc):
print(f"// \t{func_name} @ {path}:{line_col[0]}:{line_col[1]}")
else:
print(f"// \t{func_name} @ {path}")
else:
print(f"// \t{func_name}")
# If we're dealing with the function definition, then try to chop the
# printing off at the beginning of the function's body.
max_index = func_file_toks.size
if body := func.body:
if body_index := func_file_toks.index_of(body.tokens.file_tokens.front):
max_index = body_index
# Render out the tokens (as they appear in the file), except doubly
# indendet.
token_datas: List[str] = ["\t\t"]
for i, tok in enumerate(func_file_toks):
if i >= max_index:
break
token_datas.append(tok.data)
print("".join(token_datas).replace("\n", "\n\t\t"))
///////////////////////////////////////////////////////////////
// proc_name @ /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/libproc.h:99:1
int proc_name(int pid, void * buffer, uint32_t buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
// proc_pidpath @ /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/libproc.h:102:1
int proc_pidpath(int pid, void * buffer, uint32_t buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0);
///////////////////////////////////////////////////////////////
// g_lstat @ /Users/pag/Code/openssh-portable/openbsd-compat/glob.c:1014:1
static int
g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
// g_stat @ /Users/pag/Code/openssh-portable/openbsd-compat/glob.c:1026:1
static int
g_stat(Char *fn, struct stat *sb, glob_t *pglob)
///////////////////////////////////////////////////////////////
// glob0 @ /Users/pag/Code/openssh-portable/openbsd-compat/glob.c:474:1
static int
glob0(const Char *pattern, glob_t *pglob, struct glob_lim *limitp)
// globexp1 @ /Users/pag/Code/openssh-portable/openbsd-compat/glob.c:239:1
static int
globexp1(const Char *pattern, glob_t *pglob, struct glob_lim *limitp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment