Skip to content

Instantly share code, notes, and snippets.

@blackbone
Last active April 6, 2026 16:32
Show Gist options
  • Select an option

  • Save blackbone/8f511a7bbb14f7ed74ed2ca738c43842 to your computer and use it in GitHub Desktop.

Select an option

Save blackbone/8f511a7bbb14f7ed74ed2ca738c43842 to your computer and use it in GitHub Desktop.
IL2CPP STATS
#!/usr/bin/env python3
import argparse
import os
import html
import json
import re
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from collections import Counter
from pathlib import Path
SOURCE_SUFFIXES = {".c", ".cpp"}
GENERIC_FILE_RE = re.compile(
r"^(?:Generics(?:__\d+)?|GenericMethods__\d+|Il2CppGeneric.*)\.(?:c|cpp)$"
)
ASSEMBLY_RE = re.compile(r"(?:__\d+)?(?:_CodeGen)?\.(?:c|cpp)$")
TYPE_DECL_RE = re.compile(r"\b(?:struct|class|enum)\s+([A-Za-z_][A-Za-z0-9_]*)")
TYPE_TOKEN_RE = re.compile(r"\b([A-Za-z][A-Za-z0-9]*(?:_[1-9][0-9]*)?)_t[0-9A-F]{6,}\b")
METHOD_HEAD_RE = re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*)\s*\(")
GENERIC_METHOD_RE = re.compile(r"\b([A-Za-z_][A-Za-z0-9_]*)_gshared(?:_rgctx)?\b")
GENERIC_TYPE_NAME_RE = re.compile(r".+_[1-9][0-9]*$")
HASHED_GENERIC_INSTANCE_RE = re.compile(r"\b([A-Za-z][A-Za-z0-9_]*_[1-9][0-9]*_t[0-9A-F]{16,})\b")
HASHED_GENERIC_NAME_RE = re.compile(r"^([A-Za-z][A-Za-z0-9_]*_[1-9][0-9]*)_t([0-9A-F]{16,})$")
INLINE_SIGNATURE_RE = re.compile(r"^inline\s+(.+?)\s+([A-Za-z_][A-Za-z0-9_]*)\s*\((.*)\)\s*$")
STRUCT_START_RE = re.compile(r"^struct\s+([A-Za-z_][A-Za-z0-9_]*)\s*(?:\:\s+public\s+([A-Za-z_][A-Za-z0-9_]*))?\s*$")
FIELD_RE = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*(?:\s*[*&])?)\s+(___[A-Za-z0-9_]+|____[A-Za-z0-9_]+);")
ARRAY_ITEM_FIELD_RE = re.compile(
r"^\s*(?:ALIGN_FIELD\s*\(\s*\d+\s*\)\s+)?([A-Za-z_][A-Za-z0-9_]*(?:\s*[*&])?)\s+(m_Items)(?:\[[0-9]+\])\s*;"
)
MARKERS = (
"il2cpp_rgctx",
"UniTask",
"List_1",
"Dictionary_2",
"Action_1",
"Action_2",
"Func_2",
"Func_3",
"ValueTuple",
"Enumerator",
"EqualityComparer_1",
"NativeArray_1",
)
METHOD_SKIP = {"if", "for", "while", "switch", "return", "sizeof", "catch"}
TOP_GENERIC_TYPES_PER_ASSEMBLY = 300
TOP_GENERIC_METHODS_PER_ASSEMBLY = 300
MAX_WORKERS = max(1, min(os.cpu_count() or 1, 12))
CACHE_VERSION = 2
def format_mb(size: int) -> str:
return f"{size / (1024 * 1024):.2f} MB"
def ratio(part: int | float, total: int | float) -> float:
return 0.0 if total <= 0 else round(part * 100.0 / total, 3)
def normalize_method_name(name: str) -> str:
name = re.sub(r"_m[0-9A-F]{6,}$", "", name)
name = re.sub(r"_gshared(?:_rgctx)?$", "", name)
return name
def normalize_type_name(name: str) -> str:
return re.sub(r"_t[0-9A-F]{6,}$", "", name)
def format_type_name(name: str) -> str:
name = normalize_type_name(name)
name = re.sub(r"_t$", "", name)
name = name.replace("RuntimeObject", "object")
name = name.replace("Object", "object")
name = name.replace("String", "string")
name = name.replace("Boolean", "bool")
name = name.replace("Int32", "int")
name = name.replace("UInt32", "uint")
name = name.replace("Int64", "long")
name = name.replace("UInt64", "ulong")
name = name.replace("Single", "float")
name = name.replace("Double", "double")
name = name.replace("Char", "char")
name = name.replace("Byte", "byte")
name = name.replace("SByte", "sbyte")
name = name.replace("int32", "int")
name = name.replace("uint32", "uint")
name = name.replace("int64", "long")
name = name.replace("uint64", "ulong")
name = name.replace("uint8", "byte")
return name.replace("U5BU5D", "[]")
def unwrap_array_token(type_name: str) -> str:
clean = clean_field_type(type_name) if "clean_field_type" in globals() else type_name.replace("*", "").replace("&", "").strip()
return re.sub(r"U5BU5D(_t[0-9A-F]{6,})?$", "", clean)
def assembly_name(filename: str) -> str:
return ASSEMBLY_RE.sub("", filename)
def classify_assembly(name: str) -> str:
if name.startswith("Il2Cpp"):
return "il2cpp-infra"
if name.startswith("Generics") or name.startswith("GenericMethods"):
return "generic-shared"
if name.startswith("UnityEngine."):
return "unity-engine"
if name.startswith("Unity."):
return "unity-package"
if name.startswith("System") or name == "mscorlib":
return "dotnet"
if name == "Assembly-CSharp":
return "game"
return "assembly"
def derive_namespace(symbol: str) -> str:
symbol = normalize_method_name(symbol)
parts = [part for part in symbol.split("_") if part]
if len(parts) < 3:
return "(global)"
namespace = ".".join(parts[:-2])
return namespace or "(global)"
def make_id(prefix: str, value: str) -> str:
safe = re.sub(r"[^a-zA-Z0-9]+", "-", value).strip("-").lower() or "item"
return f"{prefix}-{safe}"
def top_counter(counter: Counter, limit: int):
return [{"name": name, "count": count} for name, count in counter.most_common(limit)]
def detect_root(arg_root: str | None) -> Path:
if arg_root:
return Path(arg_root)
cwd = Path.cwd()
sibling = cwd / "il2cppOutput"
if sibling.is_dir():
return sibling
if list(cwd.glob("*.cpp")) or list(cwd.glob("*.c")):
return cwd
fallback = cwd / "Library/Bee/Android/Prj/IL2CPP/Il2CppBackup/il2cppOutput"
if fallback.is_dir():
return fallback
raise SystemExit("run inside parent of il2cppOutput, inside il2cppOutput, or pass the path")
def scan_file(path: Path):
assembly = assembly_name(path.name)
result = {
"name": path.name,
"assembly": assembly,
"group": classify_assembly(assembly),
"size": path.stat().st_size,
"line_count": 0,
"is_generic_file": bool(GENERIC_FILE_RE.match(path.name)),
"type_decl_count": 0,
"method_count": 0,
"generic_method_count": 0,
"rgctx_count": 0,
"type_counter": Counter(),
"method_counter": Counter(),
"generic_method_counter": Counter(),
"marker_counter": Counter(),
"namespace_counter": Counter(),
"generic_namespace_counter": Counter(),
}
with path.open("r", encoding="utf-8", errors="ignore") as fh:
for line in fh:
result["line_count"] += 1
stripped = line.lstrip()
for token in MARKERS:
count = line.count(token)
if count:
result["marker_counter"][token] += count
if token == "il2cpp_rgctx":
result["rgctx_count"] += count
if stripped.startswith(("struct ", "class ", "enum ")):
match = TYPE_DECL_RE.search(stripped)
if match:
name = normalize_type_name(match.group(1))
result["type_counter"][name] += 1
result["type_decl_count"] += 1
if "_t" in line:
for match in TYPE_TOKEN_RE.finditer(line):
result["type_counter"][match.group(1)] += 1
method_line = (
"IL2CPP_EXTERN_C" in line
or "IL2CPP_METHOD_ATTR" in line
or stripped.startswith(("void ", "bool ", "int ", "float ", "double "))
) and "(" in line
if method_line:
match = METHOD_HEAD_RE.search(line)
if match:
raw_name = match.group(1)
if raw_name not in METHOD_SKIP:
name = normalize_method_name(raw_name)
ns = derive_namespace(raw_name)
result["method_counter"][name] += 1
result["namespace_counter"][ns] += 1
result["method_count"] += 1
if "_gshared" in line:
for match in GENERIC_METHOD_RE.finditer(line):
raw_name = match.group(1)
name = normalize_method_name(raw_name)
ns = derive_namespace(raw_name)
result["generic_method_counter"][name] += 1
result["generic_namespace_counter"][ns] += 1
result["namespace_counter"][ns] += 1
result["generic_method_count"] += 1
return result
def run_parallel(items, fn, process=True):
if len(items) <= 1:
return [fn(item) for item in items]
workers = min(MAX_WORKERS, len(items))
executor_cls = ProcessPoolExecutor if process else ThreadPoolExecutor
with executor_cls(max_workers=workers) as executor:
return list(executor.map(fn, items))
def aggregate_assemblies(files):
assemblies = {}
for file_item in files:
assembly = assemblies.setdefault(
file_item["assembly"],
{
"name": file_item["assembly"],
"group": file_item["group"],
"size": 0,
"line_count": 0,
"file_count": 0,
"generic_file_count": 0,
"type_decl_count": 0,
"method_count": 0,
"generic_method_count": 0,
"rgctx_count": 0,
"type_counter": Counter(),
"method_counter": Counter(),
"generic_method_counter": Counter(),
"marker_counter": Counter(),
"namespace_counter": Counter(),
"generic_namespace_counter": Counter(),
"files": [],
},
)
assembly["size"] += file_item["size"]
assembly["line_count"] += file_item["line_count"]
assembly["file_count"] += 1
assembly["generic_file_count"] += int(file_item["is_generic_file"])
assembly["type_decl_count"] += file_item["type_decl_count"]
assembly["method_count"] += file_item["method_count"]
assembly["generic_method_count"] += file_item["generic_method_count"]
assembly["rgctx_count"] += file_item["rgctx_count"]
assembly["type_counter"].update(file_item["type_counter"])
assembly["method_counter"].update(file_item["method_counter"])
assembly["generic_method_counter"].update(file_item["generic_method_counter"])
assembly["marker_counter"].update(file_item["marker_counter"])
assembly["namespace_counter"].update(file_item["namespace_counter"])
assembly["generic_namespace_counter"].update(file_item["generic_namespace_counter"])
assembly["files"].append(file_item)
return assemblies
def build_namespace_tree(files, total_size, generic_size, total_methods, total_generic_methods, total_rgctx):
nodes = {}
def ensure(path: str):
if path in nodes:
return nodes[path]
parent = None if path in {"", "(global)"} else path.rsplit(".", 1)[0] if "." in path else None
node = {
"id": make_id("ns", path or "root"),
"namespace": path or "(global)",
"name": "(global)" if path in {"", "(global)"} else path.split(".")[-1],
"full_name": path or "(global)",
"parent_id": None if parent is None else make_id("ns", parent),
"level": 0 if path in {"", "(global)"} else path.count("."),
"size": 0,
"method_count": 0,
"generic_method_count": 0,
"rgctx_count": 0,
"assembly_counter": Counter(),
}
nodes[path] = node
return node
for file_item in files:
namespaces = file_item["namespace_counter"] or Counter({"(global)": 1})
namespace_total = sum(namespaces.values()) or 1
for namespace, count in namespaces.items():
parts = ["(global)"] if namespace == "(global)" else namespace.split(".")
size_share = int(round(file_item["size"] * count / namespace_total))
rgctx_share = int(round(file_item["rgctx_count"] * count / namespace_total))
for depth in range(1, len(parts) + 1):
path = parts[0] if parts[0] == "(global)" else ".".join(parts[:depth])
node = ensure(path)
node["size"] += size_share
node["rgctx_count"] += rgctx_share
node["assembly_counter"][file_item["assembly"]] += count
if depth == len(parts):
node["method_count"] += count
for namespace, count in file_item["generic_namespace_counter"].items():
parts = ["(global)"] if namespace == "(global)" else namespace.split(".")
for depth in range(1, len(parts) + 1):
path = parts[0] if parts[0] == "(global)" else ".".join(parts[:depth])
ensure(path)["generic_method_count"] += count
rows = []
for node in nodes.values():
node["top_assemblies"] = top_counter(node["assembly_counter"], 10)
node["size_pct_total"] = ratio(node["size"], total_size)
node["size_pct_generic"] = ratio(node["size"], generic_size)
node["method_pct_total"] = ratio(node["method_count"], total_methods)
node["generic_method_pct_total"] = ratio(node["generic_method_count"], total_generic_methods)
node["rgctx_pct_total"] = ratio(node["rgctx_count"], total_rgctx)
rows.append(node)
rows.sort(key=lambda row: (row["level"], -row["size"], row["full_name"]))
return rows
def build_construct_rows(assemblies, total_size):
rows = []
for assembly in assemblies.values():
generic_type_counter = Counter(
{
name: count
for name, count in assembly["type_counter"].items()
if GENERIC_TYPE_NAME_RE.fullmatch(name)
}
)
categories = [
("marker", assembly["marker_counter"], None),
("generic_type", generic_type_counter, TOP_GENERIC_TYPES_PER_ASSEMBLY),
("generic_method", assembly["generic_method_counter"], TOP_GENERIC_METHODS_PER_ASSEMBLY),
]
for category, counter, limit in categories:
items = counter.most_common(limit)
denom = sum(count for _, count in items) or 1
for name, count in items:
est_size = int(round(assembly["size"] * count / denom))
rows.append(
{
"id": make_id("construct", f"{assembly['name']}:{category}:{name}"),
"assembly": assembly["name"],
"category": category,
"name": name,
"count": count,
"size_est": est_size,
"size_est_pct_total": ratio(est_size, total_size),
"size_est_pct_assembly": ratio(est_size, assembly["size"]),
"assembly_size": assembly["size"],
"group": assembly["group"],
}
)
rows.sort(key=lambda row: (row["assembly"], row["category"], -row["size_est"], -row["count"], row["name"]))
return rows
def split_params(text: str):
return [part.strip() for part in text.split(",") if part.strip()]
def extract_type_text(param: str) -> str:
param = param.strip()
if not param:
return ""
if param.endswith(" method") and "RuntimeMethod" in param:
return ""
match = re.match(r"(.+?)\s+(?:___\d+_[A-Za-z0-9_]+|__this|method)$", param)
return (match.group(1) if match else param).strip()
def pretty_cpp_type(type_text: str) -> str:
return pretty_cpp_type_resolved(type_text, None, None)
def pretty_cpp_type_resolved(type_text: str, structs=None, memo=None) -> str:
if not type_text:
return ""
text = re.sub(r"\b(const|struct|class|enum|volatile)\b", "", type_text)
text = text.replace("*", "").replace("&", "").strip()
token = text.split()[-1] if text else ""
if not token:
return ""
if structs is not None:
return resolve_non_generic_type(token, structs, memo or {}, set())
return format_type_name(token)
def extract_hashed_symbol(text: str) -> str:
match = HASHED_GENERIC_INSTANCE_RE.search(text)
return match.group(1) if match else ""
def infer_generic_display(method_name: str, return_type: str, param_types: list[str]) -> str | None:
if method_name.startswith("Action_1_Invoke") and len(param_types) >= 1:
return f"Action<{param_types[0]}>"
if method_name.startswith("Action_2_Invoke") and len(param_types) >= 2:
return f"Action<{param_types[0]}, {param_types[1]}>"
if method_name.startswith("Func_2_Invoke") and len(param_types) >= 1 and return_type:
return f"Func<{param_types[0]}, {return_type}>"
if method_name.startswith("Func_3_Invoke") and len(param_types) >= 2 and return_type:
return f"Func<{param_types[0]}, {param_types[1]}, {return_type}>"
if method_name.startswith("Predicate_1_Invoke") and len(param_types) >= 1:
return f"Predicate<{param_types[0]}>"
if method_name.startswith("Comparison_1_Invoke") and len(param_types) >= 1:
return f"Comparison<{param_types[0]}>"
if method_name.startswith(("List_1_Add", "List_1_Contains", "List_1_IndexOf")) and len(param_types) >= 1:
return f"List<{param_types[0]}>"
if method_name.startswith(("List_1_get_Item", "List_1_set_Item")) and return_type:
return f"List<{return_type}>"
if method_name.startswith(("IList_1_get_Item", "ICollection_1_Contains", "IEnumerable_1_GetEnumerator")) and return_type:
return f"{method_name.split('_', 1)[0]}<{return_type}>"
if method_name.startswith(("Dictionary_2_Add", "Dictionary_2_set_Item")) and len(param_types) >= 2:
return f"Dictionary<{param_types[0]}, {param_types[1]}>"
if method_name.startswith(("Dictionary_2_get_Item", "Dictionary_2_TryGetValue", "IDictionary_2_get_Item")) and len(param_types) >= 1 and return_type:
return f"Dictionary<{param_types[0]}, {return_type}>"
if method_name.startswith("KeyValuePair_2__ctor") and len(param_types) >= 2:
return f"KeyValuePair<{param_types[0]}, {param_types[1]}>"
if method_name.startswith("Nullable_1_get_Value") and return_type:
return f"Nullable<{return_type}>"
return None
def render_method_signature(owner_display: str, member: str, return_type: str, param_types: list[str]) -> str:
args = ", ".join(param_types)
if member == "ctor":
return owner_display
if member.startswith("get_"):
prop = member[4:]
return f"{owner_display}.{prop}" + (f" -> {return_type}" if return_type and return_type != "void" else "")
if member.startswith("set_"):
prop = member[4:]
return f"{owner_display}.{prop} = {param_types[-1]}" if param_types else f"{owner_display}.{prop}"
if member == "Invoke":
tail = f"({args})" if args else "()"
return f"{owner_display}.Invoke{tail}" + (f" -> {return_type}" if return_type and return_type != "void" else "")
tail = f"({args})" if args else "()"
return f"{owner_display}.{member}{tail}" + (f" -> {return_type}" if return_type and return_type != "void" else "")
def parse_struct_file(path: Path):
structs = {}
current = None
with path.open("r", encoding="utf-8", errors="ignore") as fh:
for raw_line in fh:
line = raw_line.rstrip("\n")
if current is None:
match = STRUCT_START_RE.match(line.strip())
if match:
current = {"name": match.group(1), "base": match.group(2) or "", "fields": []}
continue
if line.strip() == "};":
structs[current["name"]] = current
current = None
continue
field = FIELD_RE.match(line)
if field:
current["fields"].append({"type": field.group(1).strip(), "name": field.group(2)})
continue
array_field = ARRAY_ITEM_FIELD_RE.match(line)
if array_field:
current["fields"].append({"type": array_field.group(1).strip(), "name": array_field.group(2)})
return structs
def build_type_graph(root: Path):
paths = [path for path in sorted(root.iterdir()) if path.is_file() and path.suffix in SOURCE_SUFFIXES]
merged = {}
for partial in run_parallel(paths, parse_struct_file):
merged.update(partial)
return merged
def clean_field_type(type_name: str) -> str:
return type_name.replace("*", "").replace("&", "").strip()
def field_type(structs, struct_name: str, *field_names: str):
struct = structs.get(struct_name)
if not struct:
return ""
wanted = set(field_names)
for field in struct["fields"]:
if field["name"] in wanted:
return clean_field_type(field["type"])
return ""
def resolve_array_element_type(array_type: str, structs, memo, stack):
clean = clean_field_type(array_type)
if not clean:
return ""
struct = structs.get(clean)
if struct:
item_type = field_type(structs, clean, "m_Items")
if item_type:
return resolve_non_generic_type(item_type, structs, memo, stack)
if "U5BU5D_t" in clean:
return resolve_non_generic_type(clean.split("U5BU5D_t", 1)[0], structs, memo, stack)
if clean.endswith("U5BU5D"):
return resolve_non_generic_type(clean[:-6], structs, memo, stack)
base = unwrap_array_token(clean)
if base == clean:
return format_type_name(clean)
return resolve_non_generic_type(base, structs, memo, stack)
def first_hash_in_text(text: str) -> str:
match = HASHED_GENERIC_INSTANCE_RE.search(text)
return match.group(1) if match else ""
def resolve_non_generic_type(type_name: str, structs, memo, stack):
clean = clean_field_type(type_name)
if clean.endswith("U5BU5D") or ("U5BU5D_t" in clean):
return f"{resolve_array_element_type(clean, structs, memo, stack)}[]"
if clean in structs and clean not in stack:
return resolve_type_display(clean, structs, memo, stack)
return format_type_name(clean)
def resolve_type_display(symbol: str, structs, memo, stack=None):
if not symbol:
return ""
symbol = clean_field_type(symbol)
cached = memo.get(symbol)
if cached:
return cached
if stack is None:
stack = set()
if symbol in stack:
return format_type_name(symbol)
stack = set(stack)
stack.add(symbol)
base = generic_type_base(normalize_type_name(symbol))
name = format_type_name(symbol)
struct = structs.get(symbol)
if symbol.endswith("U5BU5D") or ("U5BU5D_t" in symbol):
value = f"{resolve_array_element_type(symbol, structs, memo, stack)}[]"
elif base in {"List", "PrivateList", "LowLevelList", "ObservableList"} and struct:
items_type = field_type(structs, symbol, "____items")
element = resolve_array_element_type(items_type, structs, memo, stack)
value = f"{base}<{element}>" if element else f"{base}<?>"
elif base in {"ReadOnlyCollection", "Collection"} and struct:
list_type = field_type(structs, symbol, "____list")
inner = first_hash_in_text(list_type)
inner_display = resolve_type_display(inner, structs, memo, stack) if inner else resolve_non_generic_type(list_type, structs, memo, stack)
arg = inner_display.removeprefix("List<").removesuffix(">") if inner_display.startswith("List<") else inner_display
value = f"{base}<{arg}>" if arg else f"{base}<?>"
elif base in {"KeyValuePair"} and struct:
key_type = field_type(structs, symbol, "___key", "____key")
value_type = field_type(structs, symbol, "___value", "____value")
left = resolve_non_generic_type(key_type, structs, memo, stack)
right = resolve_non_generic_type(value_type, structs, memo, stack)
value = f"KeyValuePair<{left}, {right}>"
elif base in {"Nullable"} and struct:
inner_type = field_type(structs, symbol, "___value", "____value")
value = f"Nullable<{resolve_non_generic_type(inner_type, structs, memo, stack)}>" if inner_type else "Nullable<?>"
elif base in {"Dictionary", "PrivateDictionary"} and struct:
entries_array = field_type(structs, symbol, "____entries")
entry_type = clean_field_type(entries_array)
entry_struct = structs.get(entry_type)
if entry_struct and entry_struct.get("base") == "RuntimeArray":
entry_type = field_type(structs, entry_type, "m_Items")
else:
entry_type = unwrap_array_token(entry_type)
key_type = field_type(structs, entry_type, "key", "___key")
value_type = field_type(structs, entry_type, "value", "___value")
if key_type or value_type:
value = f"{base}<{resolve_non_generic_type(key_type, structs, memo, stack)}, {resolve_non_generic_type(value_type, structs, memo, stack)}>"
else:
value = f"{base}<?>"
elif base in {"Enumerator", "InternalEnumerator"} and struct:
list_type = field_type(structs, symbol, "____list")
current_type = field_type(structs, symbol, "____current")
if current_type:
value = f"{base}<{resolve_non_generic_type(current_type, structs, memo, stack)}>"
elif list_type:
inner = first_hash_in_text(list_type)
inner_display = resolve_type_display(inner, structs, memo, stack) if inner else ""
arg = inner_display.split("<", 1)[1].rsplit(">", 1)[0] if "<" in inner_display and inner_display.endswith(">") else ""
value = f"{base}<{arg}>" if arg else f"{base}<?>"
else:
value = f"{base}<?>"
else:
value = name
memo[symbol] = value
return value
def extract_generic_args(display: str) -> list[str]:
if "<" not in display or not display.endswith(">"):
return []
inner = display.split("<", 1)[1][:-1]
args = []
depth = 0
chunk = []
for char in inner:
if char == "<":
depth += 1
chunk.append(char)
continue
if char == ">":
depth -= 1
chunk.append(char)
continue
if char == "," and depth == 0:
args.append("".join(chunk).strip())
chunk = []
continue
chunk.append(char)
tail = "".join(chunk).strip()
if tail:
args.append(tail)
return args
def make_type_node(symbol: str, structs):
clean = clean_field_type(symbol)
struct = structs.get(clean)
return {
"symbol": clean,
"base": generic_type_base(normalize_type_name(clean)),
"struct": struct,
"pending": True,
"display": None,
}
def ensure_type_node(symbol: str, nodes, structs):
clean = clean_field_type(symbol)
if not clean or clean in nodes:
return
nodes[clean] = make_type_node(clean, structs)
struct = structs.get(clean)
if not struct:
return
for field in struct["fields"]:
field_type_name = clean_field_type(field["type"])
if not field_type_name:
continue
if field_type_name in structs:
ensure_type_node(field_type_name, nodes, structs)
continue
if "U5BU5D_t" in field_type_name:
ensure_type_node(field_type_name, nodes, structs)
def build_type_nodes(symbols, structs):
nodes = {}
for symbol in symbols:
ensure_type_node(symbol, nodes, structs)
return nodes
def render_token_display(token: str, nodes, resolved, structs):
clean = clean_field_type(token)
if not clean:
return ""
if clean in resolved:
return resolved[clean]
if clean in nodes:
return nodes[clean]["display"]
if clean.endswith("U5BU5D") or "U5BU5D_t" in clean:
if clean in nodes and nodes[clean]["display"]:
return nodes[clean]["display"]
if "U5BU5D_t" in clean:
return f"{format_type_name(clean.split('U5BU5D_t', 1)[0])}[]"
return f"{format_type_name(clean[:-6])}[]"
if clean in structs:
return format_type_name(clean)
return format_type_name(clean)
def candidate_display(node, nodes, resolved, structs):
symbol = node["symbol"]
base = node["base"]
struct = node["struct"]
if symbol.endswith("U5BU5D") or "U5BU5D_t" in symbol:
item_type = field_type(structs, symbol, "m_Items")
if item_type:
return f"{render_token_display(item_type, nodes, resolved, structs)}[]"
if "U5BU5D_t" in symbol:
return f"{format_type_name(symbol.split('U5BU5D_t', 1)[0])}[]"
return f"{format_type_name(symbol[:-6])}[]"
if not struct:
return format_type_name(symbol)
if base in {"List", "PrivateList", "LowLevelList", "ObservableList"}:
items_type = field_type(structs, symbol, "____items")
item_display = render_token_display(items_type, nodes, resolved, structs) or ""
if item_display.endswith("[]"):
item_display = item_display[:-2]
return f"{base}<{item_display or '?'}>"
if base in {"ReadOnlyCollection", "Collection"}:
list_type = field_type(structs, symbol, "____list")
list_display = render_token_display(list_type, nodes, resolved, structs) or ""
args = extract_generic_args(list_display)
return f"{base}<{args[0]}>" if args else f"{base}<?>"
if base in {"KeyValuePair"}:
key_type = field_type(structs, symbol, "___key", "____key")
value_type = field_type(structs, symbol, "___value", "____value")
left = render_token_display(key_type, nodes, resolved, structs)
right = render_token_display(value_type, nodes, resolved, structs)
return f"KeyValuePair<{left or '?'}, {right or '?'}>"
if base in {"Nullable"}:
inner_type = field_type(structs, symbol, "___value", "____value")
inner = render_token_display(inner_type, nodes, resolved, structs)
return f"Nullable<{inner or '?'}>"
if base in {"Dictionary", "PrivateDictionary"}:
entries_array = field_type(structs, symbol, "____entries")
entry_array_display = render_token_display(entries_array, nodes, resolved, structs) or ""
entry_type = field_type(structs, clean_field_type(entries_array), "m_Items")
if not entry_type and entry_array_display.endswith("[]"):
entry_type = clean_field_type(entries_array).split("U5BU5D_t", 1)[0]
key_type = field_type(structs, clean_field_type(entry_type), "key", "___key")
value_type = field_type(structs, clean_field_type(entry_type), "value", "___value")
left = render_token_display(key_type, nodes, resolved, structs)
right = render_token_display(value_type, nodes, resolved, structs)
return f"{base}<{left or '?'}, {right or '?'}>"
if base in {"Enumerator", "InternalEnumerator"}:
current_type = field_type(structs, symbol, "____current")
if current_type:
current = render_token_display(current_type, nodes, resolved, structs) or ""
return f"{base}<{current or '?'}>"
list_type = field_type(structs, symbol, "____list")
list_display = render_token_display(list_type, nodes, resolved, structs) or ""
args = extract_generic_args(list_display)
return f"{base}<{args[0]}>" if args else f"{base}<?>"
if base.startswith("ValueTuple"):
items = []
for index in range(1, 9):
item_type = field_type(structs, symbol, f"___item{index}", f"____item{index}")
if not item_type:
break
items.append(render_token_display(item_type, nodes, resolved, structs) or "?")
if items:
return f"{base}<{', '.join(items)}>"
return format_type_name(symbol)
def resolve_type_nodes(nodes, structs, rounds=8):
resolved = {}
for _ in range(rounds):
changed = False
for symbol, node in nodes.items():
display = candidate_display(node, nodes, resolved, structs)
if display and node["display"] != display:
node["display"] = display
node["pending"] = "<?>" in display or "<>" in display or display.endswith("<?>")
resolved[symbol] = display
changed = True
if not changed:
break
for symbol, node in nodes.items():
if not node["display"]:
node["display"] = format_type_name(symbol)
node["pending"] = False
resolved[symbol] = node["display"]
return resolved
def resolve_generic_methods(root: Path, resolved_variants):
symbol_to_display = {row["symbol"]: row["display"] for row in resolved_variants}
methods = {}
paths = [path for path in sorted(root.iterdir()) if path.is_file() and path.suffix in SOURCE_SUFFIXES]
items = [(path, symbol_to_display, build_variant_display_lookup(resolved_variants)) for path in paths]
for partial in run_parallel(items, parse_method_file, process=False):
for key, row in partial.items():
item = methods.setdefault(
key,
{
"group_name": row["group_name"],
"owner": row["owner"],
"variation": row["variation"],
"hits": 0,
"assemblies": Counter(),
},
)
item["hits"] += row["hits"]
item["assemblies"].update(row["assemblies"])
rows = []
for item in methods.values():
rows.append(
{
"group_name": item["group_name"],
"owner": item["owner"],
"variation": item["variation"],
"hits": item["hits"],
"assemblies_count": len(item["assemblies"]),
"assemblies": top_counter(item["assemblies"], 10),
}
)
rows.sort(key=lambda row: (-row["hits"], row["group_name"], row["variation"]))
return rows
def parse_method_file(item):
path, symbol_to_display, type_lookup = item
assembly = assembly_name(path.name)
methods = {}
with path.open("r", encoding="utf-8", errors="ignore") as fh:
for line in fh:
stripped = line.strip()
if not stripped.startswith("inline "):
continue
match = INLINE_SIGNATURE_RE.match(stripped)
if not match:
continue
return_type_raw, method_name, params_text = match.groups()
normalized = normalize_method_name(method_name)
owner_base, member = split_generic_method_name(normalized)
params = split_params(params_text)
if not params:
continue
owner_symbol = extract_hashed_symbol(params[0])
if not owner_symbol:
continue
owner_display = symbol_to_display.get(owner_symbol)
if not owner_display:
continue
param_types = []
for param in params[1:]:
type_text = extract_type_text(param)
if not type_text:
continue
param_types.append(resolve_cpp_type_display(type_text, type_lookup))
return_type = resolve_cpp_type_display(return_type_raw, type_lookup)
display = render_method_signature(owner_display, member, return_type, param_types)
group_name = pretty_generic_method_base(normalized)
key = (group_name, display)
entry = methods.setdefault(
key,
{
"group_name": group_name,
"owner": owner_base,
"variation": display,
"hits": 0,
"assemblies": Counter(),
},
)
entry["hits"] += 1
entry["assemblies"][assembly] += 1
return methods
def root_signature(root: Path):
file_count = 0
total_size = 0
latest_mtime_ns = 0
for path in root.iterdir():
if not path.is_file() or path.suffix not in SOURCE_SUFFIXES:
continue
stat = path.stat()
file_count += 1
total_size += stat.st_size
latest_mtime_ns = max(latest_mtime_ns, stat.st_mtime_ns)
return {
"cache_version": CACHE_VERSION,
"file_count": file_count,
"total_size": total_size,
"latest_mtime_ns": latest_mtime_ns,
}
def resolve_generic_variants(root: Path, structs):
usage = {}
evidence = {}
paths = [path for path in sorted(root.iterdir()) if path.is_file() and path.suffix in SOURCE_SUFFIXES]
for partial_usage, _ in run_parallel(paths, parse_variant_file):
for symbol, item in partial_usage.items():
entry = usage.setdefault(
symbol,
{"base": item["base"], "hits": 0, "assemblies": Counter()},
)
entry["hits"] += item["hits"]
entry["assemblies"].update(item["assemblies"])
unresolved = set(usage)
items = [(path, unresolved, structs) for path in paths]
for partial_evidence in run_parallel(items, parse_variant_evidence_file, process=False):
for symbol, counter in partial_evidence.items():
evidence.setdefault(symbol, Counter()).update(counter)
nodes = build_type_nodes(usage.keys(), structs)
resolved = resolve_type_nodes(nodes, structs)
variants = []
for symbol, item in usage.items():
best = evidence.get(symbol, Counter()).most_common(1)
display = resolved.get(symbol) or nodes.get(symbol, {}).get("display") or format_type_name(symbol)
if item["base"] in {"Action", "Func", "Predicate", "Comparison", "UnityAction"} and best:
display = best[0][0]
elif (not display or "<?>" in display) and best:
display = best[0][0]
if not display:
display = f"{item['base']}<?>"
variants.append(
{
"symbol": symbol,
"base": item["base"],
"display": display,
"hits": item["hits"],
"assemblies_count": len(item["assemblies"]),
"assemblies": top_counter(item["assemblies"], 10),
}
)
variants.sort(key=lambda row: (-row["hits"], row["display"]))
return variants
def parse_variant_file(path: Path):
usage = {}
assembly = assembly_name(path.name)
with path.open("r", encoding="utf-8", errors="ignore") as fh:
for line in fh:
for symbol in set(HASHED_GENERIC_INSTANCE_RE.findall(line)):
item = usage.setdefault(
symbol,
{"base": generic_type_base(normalize_type_name(symbol)), "hits": 0, "assemblies": Counter()},
)
item["hits"] += 1
item["assemblies"][assembly] += 1
return usage, {}
def parse_variant_evidence_file(item):
path, unresolved, structs = item
evidence = {}
memo = {}
with path.open("r", encoding="utf-8", errors="ignore") as fh:
for line in fh:
stripped = line.strip()
if not stripped.startswith("inline "):
continue
match = INLINE_SIGNATURE_RE.match(stripped)
if not match:
continue
return_type_raw, method_name, params_text = match.groups()
params = split_params(params_text)
if not params:
continue
symbol = extract_hashed_symbol(params[0])
if not symbol or symbol not in unresolved:
continue
param_types = []
for param in params[1:]:
type_text = extract_type_text(param)
if not type_text:
continue
param_types.append(pretty_cpp_type_resolved(type_text, structs, memo))
return_type = pretty_cpp_type_resolved(return_type_raw, structs, memo)
display = infer_generic_display(method_name, return_type, param_types)
if display:
evidence.setdefault(symbol, Counter())[display] += 1
return evidence
def build_variant_display_lookup(resolved_variants):
lookup = {}
for row in resolved_variants:
lookup[row["symbol"]] = row["display"]
return lookup
def resolve_cpp_type_display(type_text: str, type_lookup) -> str:
if not type_text:
return ""
text = re.sub(r"\b(const|struct|class|enum|volatile)\b", "", type_text)
text = text.replace("*", "").replace("&", "").strip()
token = text.split()[-1] if text else ""
if not token:
return ""
if token in type_lookup:
return type_lookup[token]
return format_type_name(token)
def generic_type_base(name: str) -> str:
return re.sub(r"_[1-9][0-9]*$", "", name)
def generic_method_base(name: str) -> str:
return re.sub(r"_T[A-Za-z0-9_]+", "", name)
def generic_method_owner(name: str) -> str:
match = re.match(r"([A-Za-z0-9]+_[1-9][0-9]*)_", name)
return generic_type_base(match.group(1)) if match else ""
def split_generic_method_name(name: str):
base = generic_method_base(name)
match = re.match(r"^([A-Za-z0-9]+_[1-9][0-9]*)__([A-Za-z0-9_]+)$", base)
if match:
return generic_type_base(match.group(1)), match.group(2)
match = re.match(r"^([A-Za-z0-9]+_[1-9][0-9]*)_([A-Za-z0-9_]+)$", base)
if match:
return generic_type_base(match.group(1)), match.group(2)
return generic_method_owner(base), base
def pretty_generic_method_base(name: str) -> str:
owner, member = split_generic_method_name(name)
if member in {"MoveNext", "Dispose", "GetEnumerator"} or owner in {"Enumerator", "InternalEnumerator"}:
return "Iteration Overhead"
return f"{owner}.{member}" if owner and member else generic_method_base(name)
def build_generic_type_groups(constructs, total_size, resolved_variants):
resolved_by_base = {}
for row in resolved_variants:
bucket = resolved_by_base.setdefault(row["base"], {})
item = bucket.setdefault(
row["display"],
{"variation": row["display"], "hits": 0, "assemblies_count": 0, "assemblies": []},
)
item["hits"] += row["hits"]
item["assemblies_count"] = max(item["assemblies_count"], row["assemblies_count"])
if not item["assemblies"]:
item["assemblies"] = row["assemblies"]
groups = {}
for row in constructs:
if row["category"] != "generic_type":
continue
base = generic_type_base(row["name"])
group = groups.setdefault(
base,
{
"id": make_id("gtype", base),
"name": base,
"count": 0,
"size_est": 0,
"assemblies": set(),
"variations": [],
},
)
group["count"] += row["count"]
group["size_est"] += row["size_est"]
group["assemblies"].add(row["assembly"])
group["variations"].append({"assembly": row["assembly"], "name": row["name"], "count": row["count"]})
rows = []
for group in groups.values():
resolved = list(resolved_by_base.get(group["name"], {}).values())
resolved.sort(key=lambda row: (-row["hits"], row["variation"]))
total_hits = sum(row["hits"] for row in resolved) or 0
if resolved:
variations = []
for row in resolved:
size_est = int(round(group["size_est"] * row["hits"] / total_hits)) if total_hits else 0
variations.append(
{
"variation": row["variation"],
"hits": row["hits"],
"assemblies_count": row["assemblies_count"],
"assemblies": ", ".join(entry["name"] for entry in row["assemblies"][:4]),
"size_est": size_est,
}
)
else:
per_assembly = Counter()
for row in group["variations"]:
per_assembly[row["assembly"]] += row["count"]
total_hits = sum(per_assembly.values()) or 1
variations = [
{
"variation": f"{group['name']}<?>",
"hits": count,
"assemblies_count": 1,
"assemblies": assembly,
"size_est": int(round(group["size_est"] * count / total_hits)),
}
for assembly, count in per_assembly.most_common()
]
rows.append(
{
"id": group["id"],
"name": group["name"],
"top_variation": variations[0]["variation"] if variations else f"{group['name']}<?>",
"count": group["count"],
"size_est": group["size_est"],
"size_pct_total": ratio(group["size_est"], total_size),
"assemblies_count": len(group["assemblies"]),
"variation_count": len(variations),
"variations": variations,
}
)
rows.sort(key=lambda row: (-row["size_est"], -row["count"], row["name"]))
return rows
def build_generic_method_groups(constructs, total_size, resolved_methods):
resolved_by_group = {}
for row in resolved_methods:
bucket = resolved_by_group.setdefault(row["group_name"], {})
item = bucket.setdefault(
row["variation"],
{
"variation": row["variation"],
"owner": row["owner"],
"hits": 0,
"assemblies_count": 0,
"assemblies": [],
},
)
item["hits"] += row["hits"]
item["assemblies_count"] = max(item["assemblies_count"], row["assemblies_count"])
if not item["assemblies"]:
item["assemblies"] = row["assemblies"]
groups = {}
for row in constructs:
if row["category"] != "generic_method":
continue
base = pretty_generic_method_base(row["name"])
owner = split_generic_method_name(row["name"])[0]
group = groups.setdefault(
base,
{
"id": make_id("gmethod", base),
"name": base,
"owner": owner,
"count": 0,
"size_est": 0,
"assemblies": set(),
"variations": [],
},
)
group["count"] += row["count"]
group["size_est"] += row["size_est"]
group["assemblies"].add(row["assembly"])
group["variations"].append(
{
"assembly": row["assembly"],
"name": row["name"],
"owner": generic_method_owner(row["name"]),
"count": row["count"],
"size_est": row["size_est"],
}
)
rows = []
for group in groups.values():
resolved = list(resolved_by_group.get(group["name"], {}).values())
resolved.sort(key=lambda row: (-row["hits"], row["variation"]))
total_hits = sum(row["hits"] for row in resolved) or 0
if resolved:
variations = []
for row in resolved:
size_est = int(round(group["size_est"] * row["hits"] / total_hits)) if total_hits else 0
variations.append(
{
"variation": row["variation"],
"owner": row["owner"],
"hits": row["hits"],
"assemblies_count": row["assemblies_count"],
"assemblies": ", ".join(entry["name"] for entry in row["assemblies"][:4]),
"size_est": size_est,
}
)
else:
variations = sorted(
group["variations"],
key=lambda row: (-row["size_est"], -row["count"], row["assembly"], row["name"]),
)
variations = [
{
"variation": row["name"],
"owner": row["owner"],
"hits": row["count"],
"assemblies_count": 1,
"assemblies": row["assembly"],
"size_est": row["size_est"],
}
for row in variations
]
rows.append(
{
"id": group["id"],
"name": group["name"],
"owner": group["owner"],
"top_variation": variations[0]["variation"] if variations else group["name"],
"count": group["count"],
"size_est": group["size_est"],
"size_pct_total": ratio(group["size_est"], total_size),
"assemblies_count": len(group["assemblies"]),
"variation_count": len(variations),
"resolved_count": sum(1 for row in variations if "<" in row["variation"]),
"variations": variations,
}
)
rows.sort(key=lambda row: (-row["size_est"], -row["count"], row["name"]))
return rows
def build_report(files, resolved_variants, resolved_methods):
total_size = sum(file_item["size"] for file_item in files)
total_lines = sum(file_item["line_count"] for file_item in files)
generic_size = sum(file_item["size"] for file_item in files if file_item["is_generic_file"])
total_types = sum(file_item["type_decl_count"] for file_item in files)
total_methods = sum(file_item["method_count"] for file_item in files)
total_generic_methods = sum(file_item["generic_method_count"] for file_item in files)
total_rgctx = sum(file_item["rgctx_count"] for file_item in files)
assemblies = aggregate_assemblies(files)
global_markers = Counter()
global_generic_methods = Counter()
global_generic_types = Counter()
for assembly in assemblies.values():
global_markers.update(assembly["marker_counter"])
global_generic_methods.update(assembly["generic_method_counter"])
global_generic_types.update(
Counter(
{
name: count
for name, count in assembly["type_counter"].items()
if GENERIC_TYPE_NAME_RE.fullmatch(name)
}
)
)
constructs = build_construct_rows(assemblies, total_size)
generic_type_groups = build_generic_type_groups(constructs, total_size, resolved_variants)
generic_method_groups = build_generic_method_groups(constructs, total_size, resolved_methods)
return {
"summary": {
"source_files": len(files),
"total_size": total_size,
"total_lines": total_lines,
"generic_size": generic_size,
"generic_size_pct_total": ratio(generic_size, total_size),
"total_types": total_types,
"total_methods": total_methods,
"total_generic_methods": total_generic_methods,
"total_rgctx": total_rgctx,
},
"assemblies": [
{
"id": make_id("assembly", assembly["name"]),
"name": assembly["name"],
"group": assembly["group"],
"size": assembly["size"],
"size_pct_total": ratio(assembly["size"], total_size),
"size_pct_generic": ratio(assembly["size"], generic_size),
"file_count": assembly["file_count"],
"method_count": assembly["method_count"],
"generic_method_count": assembly["generic_method_count"],
"rgctx_count": assembly["rgctx_count"],
"markers": dict(assembly["marker_counter"]),
"top_generic_methods": top_counter(assembly["generic_method_counter"], 30),
"top_generic_types": top_counter(
Counter(
{
name: count
for name, count in assembly["type_counter"].items()
if GENERIC_TYPE_NAME_RE.fullmatch(name)
}
),
30,
),
}
for assembly in sorted(assemblies.values(), key=lambda row: row["size"], reverse=True)
],
"namespaces": build_namespace_tree(
files,
total_size,
generic_size,
total_methods,
total_generic_methods,
total_rgctx,
),
"constructs": constructs,
"resolved_generic_variants": resolved_variants,
"resolved_generic_methods": resolved_methods,
"generic_type_groups": generic_type_groups,
"generic_method_groups": generic_method_groups,
"global_markers": top_counter(global_markers, 50),
"global_generic_methods": top_counter(global_generic_methods, 50),
"global_generic_types": top_counter(global_generic_types, 50),
}
def print_console_report(report):
summary = report["summary"]
print(f"source files: {summary['source_files']}")
print(f"total source size: {format_mb(summary['total_size'])}")
print(f"generic source size: {format_mb(summary['generic_size'])} ({summary['generic_size_pct_total']}%)")
print(f"total methods: {summary['total_methods']}")
print(f"generic methods: {summary['total_generic_methods']}")
print(f"rgctx refs: {summary['total_rgctx']}")
print()
print("top constructs:")
for row in sorted(report["constructs"], key=lambda item: item["size_est"], reverse=True)[:20]:
print(
f" {format_mb(row['size_est']):>10} {row['assembly']} "
f"{row['category']} {row['name']} count={row['count']}"
)
def table_html(table_id, rows, columns, row_builder=None):
head = "".join(
f'<th data-key="{html.escape(col["key"])}" data-type="{html.escape(col["type"])}">{html.escape(col["label"])}</th>'
for col in columns
)
body = []
for row in rows:
if row_builder:
body.append(row_builder(row))
continue
attrs = {
"data-row-id": row["id"],
"data-name": str(row.get("name", "")),
"data-assembly": str(row.get("assembly", "")),
"data-category": str(row.get("category", row.get("group", ""))),
}
attr_text = " ".join(f'{key}="{html.escape(value)}"' for key, value in attrs.items())
cells = "".join(f"<td>{html.escape(str(row.get(col['key'], '')))}</td>" for col in columns)
body.append(f"<tr {attr_text}>{cells}</tr>")
return f'<table id="{html.escape(table_id)}" class="report-table"><thead><tr>{head}</tr></thead><tbody>{"".join(body)}</tbody></table>'
def render_html(report):
summary = report["summary"]
report_json = json.dumps(report, ensure_ascii=False)
cards = [
("source files", summary["source_files"]),
("total source size", format_mb(summary["total_size"])),
("generic source size", f"{format_mb(summary['generic_size'])} ({summary['generic_size_pct_total']}%)"),
("methods", summary["total_methods"]),
("generic methods", summary["total_generic_methods"]),
("rgctx refs", summary["total_rgctx"]),
]
cards_html = "".join(
f'<div class="card"><div class="card-title">{html.escape(str(label))}</div><div class="card-value">{html.escape(str(value))}</div></div>'
for label, value in cards
)
return f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>IL2CPP Report</title>
<style>
:root {{
--bg:#0b1020; --panel:#121933; --panel2:#182142; --panel3:#1f2a52; --text:#e8edff; --muted:#9fb0e3; --border:#2a3566;
}}
* {{ box-sizing:border-box; }}
body {{ margin:0; background:var(--bg); color:var(--text); font:14px/1.4 ui-sans-serif,system-ui,sans-serif; }}
.page {{ width:100%; padding:20px; display:grid; gap:20px; }}
.cards {{ display:grid; grid-template-columns:repeat(auto-fit,minmax(170px,1fr)); gap:12px; }}
.card,.panel {{ background:var(--panel); border:1px solid var(--border); border-radius:10px; padding:14px; min-width:0; }}
.card-title,.section-label {{ color:var(--muted); text-transform:uppercase; font-size:11px; letter-spacing:.06em; margin-bottom:8px; }}
.card-value {{ font-size:22px; font-weight:700; }}
.tabs {{ display:flex; gap:10px; flex-wrap:wrap; }}
.tab {{
background:var(--panel); color:var(--muted); border:1px solid var(--border);
border-radius:999px; padding:8px 12px; cursor:pointer;
}}
.tab.active {{ background:var(--panel3); color:var(--text); }}
.layout {{ display:grid; grid-template-columns:minmax(0,1fr) minmax(0,1fr); gap:20px; align-items:start; }}
.controls {{ display:flex; gap:10px; align-items:center; margin-bottom:10px; }}
input {{ background:var(--panel2); color:var(--text); border:1px solid var(--border); border-radius:8px; padding:8px 10px; width:100%; }}
.panel h2,.panel h3 {{ margin:0 0 12px; font-size:16px; }}
.list-wrap,.detail-wrap,.table-wrap {{ overflow:auto; max-height:72vh; }}
.report-table {{ width:100%; border-collapse:collapse; font-size:12px; table-layout:fixed; }}
.report-table th,.report-table td {{
padding:7px 8px; height:34px; border-bottom:1px solid var(--border); vertical-align:middle;
overflow:hidden; text-overflow:ellipsis; white-space:nowrap;
}}
.report-table th {{ text-align:left; color:var(--muted); position:sticky; top:0; background:var(--panel); cursor:pointer; }}
.report-table tbody tr:hover {{ background:rgba(255,255,255,.04); }}
.selected-row {{ background:rgba(159,176,227,.12) !important; }}
.muted {{ color:var(--muted); }}
.kv {{ display:grid; grid-template-columns:140px 1fr; gap:6px 10px; margin-bottom:14px; }}
.subgrid {{ display:grid; grid-template-columns:repeat(2,minmax(0,1fr)); gap:12px; }}
.subpanel {{ background:var(--panel2); border:1px solid var(--border); border-radius:8px; padding:10px; min-height:120px; }}
.subpanel h4 {{ margin:0 0 8px; color:var(--muted); text-transform:uppercase; font-size:12px; }}
.subpanel ol,.subpanel ul {{ margin:0; padding-left:18px; font-size:12px; }}
.chips {{ display:flex; flex-wrap:wrap; gap:6px; }}
.chip {{ background:var(--panel2); border:1px solid var(--border); border-radius:999px; padding:4px 8px; font-size:12px; }}
@media (max-width:1200px) {{ .layout {{ grid-template-columns:1fr; }} }}
</style>
</head>
<body>
<div class="page">
<div class="cards">{cards_html}</div>
<div class="tabs">
<button class="tab active" data-tab="assemblies">assemblies</button>
<button class="tab" data-tab="genericTypes">generic types</button>
<button class="tab" data-tab="genericMethods">generic methods</button>
</div>
<div class="layout">
<div class="panel">
<h2 id="list-title">Assemblies</h2>
<div class="controls">
<input id="list-filter" placeholder="filter by name">
</div>
<div class="list-wrap">
<table id="list-table" class="report-table">
<thead></thead>
<tbody></tbody>
</table>
</div>
</div>
<div class="panel">
<h2 id="detail-title">Details</h2>
<div class="detail-wrap" id="detail-panel">
<div class="muted">select an item on the left</div>
</div>
</div>
</div>
</div>
<script>
const report = {report_json};
const tabs = {{
assemblies: {{
title: 'Assemblies',
rows: report.assemblies,
columns: [
{{ key: 'name', label: 'assembly', type: 'text' }},
{{ key: 'size', label: 'size bytes', type: 'number' }},
{{ key: 'size_pct_total', label: '% total', type: 'number' }},
{{ key: 'method_count', label: 'methods', type: 'number' }},
{{ key: 'generic_method_count', label: 'generic methods', type: 'number' }},
{{ key: 'rgctx_count', label: 'rgctx', type: 'number' }},
],
}},
genericTypes: {{
title: 'Generic Types',
rows: report.generic_type_groups,
columns: [
{{ key: 'name', label: 'base type', type: 'text' }},
{{ key: 'top_variation', label: 'top variation', type: 'text' }},
{{ key: 'count', label: 'count', type: 'number' }},
{{ key: 'size_est', label: 'est. bytes', type: 'number' }},
{{ key: 'size_pct_total', label: '% total', type: 'number' }},
{{ key: 'assemblies_count', label: 'assemblies', type: 'number' }},
{{ key: 'variation_count', label: 'variations', type: 'number' }},
],
}},
genericMethods: {{
title: 'Generic Methods',
rows: report.generic_method_groups,
columns: [
{{ key: 'name', label: 'base method', type: 'text' }},
{{ key: 'top_variation', label: 'top signature', type: 'text' }},
{{ key: 'count', label: 'count', type: 'number' }},
{{ key: 'size_est', label: 'est. bytes', type: 'number' }},
{{ key: 'size_pct_total', label: '% total', type: 'number' }},
{{ key: 'assemblies_count', label: 'assemblies', type: 'number' }},
{{ key: 'variation_count', label: 'variations', type: 'number' }},
],
}},
}};
const state = {{
tab: 'assemblies',
selected: {{ assemblies: null, genericTypes: null, genericMethods: null }},
sort: {{
assemblies: {{ key: 'size', type: 'number', desc: true }},
genericTypes: {{ key: 'size_est', type: 'number', desc: true }},
genericMethods: {{ key: 'size_est', type: 'number', desc: true }},
}},
filter: {{ assemblies: '', genericTypes: '', genericMethods: '' }},
}};
function fmtNumber(v) {{ return new Intl.NumberFormat('en-US').format(v || 0); }}
function fmtPercent(v) {{ return `${{Number(v || 0).toFixed(3)}}%`; }}
function fmtBytes(v) {{
const mb = (v || 0) / 1024 / 1024;
return `${{mb.toFixed(2)}} MB (${{fmtNumber(v || 0)}} bytes)`;
}}
function esc(v) {{
return String(v ?? '')
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;');
}}
function renderCountList(items, title) {{
if (!items || !items.length) return `<div class="subpanel"><h4>${{esc(title)}}</h4><div class="muted">empty</div></div>`;
return `<div class="subpanel"><h4>${{esc(title)}}</h4><ol>${{items.map(item => `<li>${{esc(item.name)}} (${{fmtNumber(item.count)}})</li>`).join('')}}</ol></div>`;
}}
function renderVariantTable(items, columns) {{
if (!items.length) return '<div class="muted">empty</div>';
const head = columns.map(col => `<th>${{esc(col.label)}}</th>`).join('');
const rows = items.map(row => `<tr>${{columns.map(col => `<td title="${{esc(row[col.key] ?? '')}}">${{esc(row[col.key] ?? '')}}</td>`).join('')}}</tr>`).join('');
return `<div class="table-wrap"><table class="report-table"><thead><tr>${{head}}</tr></thead><tbody>${{rows}}</tbody></table></div>`;
}}
function renderChips(markers) {{
const entries = Object.entries(markers || {{}}).sort((a, b) => b[1] - a[1]).slice(0, 12);
if (!entries.length) return '<div class="muted">no markers</div>';
return `<div class="chips">${{entries.map(([name, count]) => `<span class="chip">${{esc(name)}}: ${{fmtNumber(count)}}</span>`).join('')}}</div>`;
}}
function addAdvice(list, text) {{
if (!list.includes(text)) list.push(text);
}}
function adviceForName(kind, name, assembly = '') {{
const out = [];
if (name === 'Iteration Overhead' || name.includes('GetEnumerator') || name.includes('MoveNext') || name.includes('Dispose') || name === 'Enumerator') {{
addAdvice(out, 'This is iteration overhead: too many foreach, iterator blocks, or IEnumerable<T>-based loops.');
addAdvice(out, 'Search for foreach in hot paths first, then replace List/array loops with for where safe.');
addAdvice(out, 'Search for LINQ: Where, Select, Any, First, ToList, OrderBy, GroupBy.');
addAdvice(out, 'Search for APIs typed as IEnumerable<T> and switch hot/shared paths to List<T> or arrays.');
addAdvice(out, 'Search for yield return and custom iterators that allocate enumerator state machines.');
}}
if (name.startsWith('List')) {{
addAdvice(out, `Reduce List usage${{assembly ? ` in ${{assembly}}` : ''}} where size is fixed or known.`);
addAdvice(out, 'Use arrays or pre-sized buffers in hot code and serialization paths.');
addAdvice(out, 'Check foreach-heavy loops and temporary list allocations.');
}}
if (name.startsWith('Dictionary')) {{
addAdvice(out, `Reduce Dictionary usage${{assembly ? ` in ${{assembly}}` : ''}} when key space is small.`);
addAdvice(out, 'Prefer arrays, enum-indexed tables, or prebuilt lookup maps when possible.');
}}
if (name.startsWith('Func') || name.startsWith('Action')) {{
addAdvice(out, `Reduce delegate shape count${{assembly ? ` in ${{assembly}}` : ''}}.`);
addAdvice(out, 'Replace hot-path lambdas and closures with cached delegates or explicit methods.');
}}
if (name.startsWith('SerializerBase') || name.startsWith('TypeMapper') || name.includes('ReadObject') || name.includes('WriteObject')) {{
addAdvice(out, 'Serialization generics look like a root cause here.');
addAdvice(out, 'Reduce distinct T combinations and generic serializer entry points.');
}}
if (name.includes('Where') || name.includes('Select') || name.includes('ToList')) {{
addAdvice(out, 'This looks LINQ-heavy.');
addAdvice(out, 'Replace hot/shared LINQ paths with explicit loops.');
}}
if (name.includes('GetEnumerator') || name.includes('MoveNext') || name.includes('Dispose') || name === 'Enumerator') {{
addAdvice(out, 'Use indexed loops for List and array where possible.');
}}
if (kind === 'assembly' && !out.length) {{
addAdvice(out, `Open top generic types and methods in ${{name}} and cut the biggest repeated shapes first.`);
}}
if (!out.length) {{
addAdvice(out, 'Reduce one-off generic combinations and consolidate repeated shapes.');
}}
return out;
}}
function rowsForCurrentTab() {{
const cfg = tabs[state.tab];
const q = state.filter[state.tab].trim().toLowerCase();
const rows = cfg.rows.filter(row => {{
if (!q) return true;
return cfg.columns.some(col => String(row[col.key] ?? '').toLowerCase().includes(q));
}});
const sort = state.sort[state.tab];
rows.sort((a, b) => {{
const av = a[sort.key];
const bv = b[sort.key];
const order = sort.desc ? -1 : 1;
if (sort.type === 'number') return ((Number(av) || 0) - (Number(bv) || 0)) * order;
return String(av ?? '').localeCompare(String(bv ?? '')) * order;
}});
return rows;
}}
function renderList() {{
const cfg = tabs[state.tab];
const rows = rowsForCurrentTab();
document.getElementById('list-title').textContent = cfg.title;
document.getElementById('detail-title').textContent = cfg.title + ' Summary';
document.getElementById('list-filter').value = state.filter[state.tab];
const thead = document.querySelector('#list-table thead');
const tbody = document.querySelector('#list-table tbody');
thead.innerHTML = `<tr>${{cfg.columns.map(col => `<th data-key="${{esc(col.key)}}" data-type="${{esc(col.type)}}">${{esc(col.label)}}</th>`).join('')}}</tr>`;
tbody.innerHTML = rows.length
? rows.map(row => {{
const cls = state.selected[state.tab] === row.id ? ' class="selected-row"' : '';
return `<tr data-row-id="${{esc(row.id)}}"${{cls}}>${{cfg.columns.map(col => `<td title="${{esc(row[col.key] ?? '')}}">${{esc(row[col.key] ?? '')}}</td>`).join('')}}</tr>`;
}}).join('')
: `<tr><td colspan="${{cfg.columns.length}}" class="muted">no rows</td></tr>`;
thead.querySelectorAll('th').forEach(th => {{
th.addEventListener('click', () => {{
const nextKey = th.dataset.key;
const sort = state.sort[state.tab];
if (sort.key === nextKey) sort.desc = !sort.desc;
else {{
sort.key = nextKey;
sort.type = th.dataset.type || 'text';
sort.desc = sort.type === 'number';
}}
renderList();
}});
}});
}}
function renderAssemblyDetail(item) {{
const assemblyConstructs = report.constructs
.filter(row => row.assembly === item.name)
.sort((a, b) => b.size_est - a.size_est || b.count - a.count)
.slice(0, 30)
.map(row => ({{
name: `${{row.category}} / ${{row.name}}`,
count: row.count,
}}));
const advice = adviceForName('assembly', item.name);
return `
<div class="kv">
<div class="muted">assembly</div><div>${{item.name}}</div>
<div class="muted">group</div><div>${{item.group}}</div>
<div class="muted">size</div><div>${{fmtBytes(item.size)}} / ${{fmtPercent(item.size_pct_total)}} total</div>
<div class="muted">% generic</div><div>${{fmtPercent(item.size_pct_generic)}}</div>
<div class="muted">methods</div><div>${{fmtNumber(item.method_count)}}</div>
<div class="muted">generic methods</div><div>${{fmtNumber(item.generic_method_count)}}</div>
<div class="muted">rgctx</div><div>${{fmtNumber(item.rgctx_count)}}</div>
</div>
<div class="subpanel"><h4>what to do</h4><ol>${{advice.map(text => `<li>${{text}}</li>`).join('')}}</ol></div>
<h3>markers</h3>
${{renderChips(item.markers)}}
<div class="subgrid">
${{renderCountList(item.top_generic_types, 'top generic types')}}
${{renderCountList(item.top_generic_methods, 'top generic methods')}}
</div>
<h3>top constructs</h3>
${{renderCountList(assemblyConstructs, 'largest constructs')}}
`;
}}
function renderGenericTypeDetail(item) {{
const advice = adviceForName('genericType', item.name);
const variations = item.variations.map(row => ({{
variation: row.variation,
hits: row.hits,
assemblies_count: row.assemblies_count,
assemblies: row.assemblies,
size_est: row.size_est,
pct_group: fmtPercent(item.size_est ? row.size_est * 100 / item.size_est : 0),
}}));
return `
<div class="kv">
<div class="muted">base type</div><div>${{item.name}}</div>
<div class="muted">est. size</div><div>${{fmtBytes(item.size_est)}} / ${{fmtPercent(item.size_pct_total)}} total</div>
<div class="muted">count</div><div>${{fmtNumber(item.count)}}</div>
<div class="muted">assemblies</div><div>${{fmtNumber(item.assemblies_count)}}</div>
<div class="muted">variations</div><div>${{fmtNumber(item.variation_count)}}</div>
</div>
<div class="subpanel"><h4>what to do</h4><ol>${{advice.map(text => `<li>${{text}}</li>`).join('')}}</ol></div>
<h3>variations</h3>
${{renderVariantTable(variations, [
{{ key: 'variation', label: 'variation' }},
{{ key: 'hits', label: 'hits' }},
{{ key: 'assemblies_count', label: 'assemblies' }},
{{ key: 'assemblies', label: 'top assemblies' }},
{{ key: 'size_est', label: 'est. bytes' }},
{{ key: 'pct_group', label: '% group' }},
])}}
`;
}}
function renderGenericMethodDetail(item) {{
const advice = adviceForName('genericMethod', item.name);
const variations = item.variations.map(row => ({{
assembly: row.assembly,
owner: row.owner || '',
variation: row.name,
count: row.count,
size_est: row.size_est,
pct_group: fmtPercent(item.size_est ? row.size_est * 100 / item.size_est : 0),
}}));
return `
<div class="kv">
<div class="muted">base method</div><div>${{item.name}}</div>
<div class="muted">owner type</div><div>${{item.owner || 'n/a'}}</div>
<div class="muted">cluster</div><div>${{item.name === 'Iteration Overhead' ? 'foreach / iterator / IEnumerable<T>' : 'generic method group'}}</div>
<div class="muted">est. size</div><div>${{fmtBytes(item.size_est)}} / ${{fmtPercent(item.size_pct_total)}} total</div>
<div class="muted">count</div><div>${{fmtNumber(item.count)}}</div>
<div class="muted">assemblies</div><div>${{fmtNumber(item.assemblies_count)}}</div>
<div class="muted">variations</div><div>${{fmtNumber(item.variation_count)}}</div>
</div>
<div class="subpanel"><h4>what to do</h4><ol>${{advice.map(text => `<li>${{text}}</li>`).join('')}}</ol></div>
<h3>variations</h3>
${{renderVariantTable(variations, [
{{ key: 'assembly', label: 'assembly' }},
{{ key: 'owner', label: 'owner type' }},
{{ key: 'variation', label: 'variation' }},
{{ key: 'count', label: 'count' }},
{{ key: 'size_est', label: 'est. bytes' }},
{{ key: 'pct_group', label: '% group' }},
])}}
`;
}}
function renderDetail() {{
const panel = document.getElementById('detail-panel');
const selectedId = state.selected[state.tab];
if (!selectedId) {{
panel.innerHTML = '<div class="muted">select an item on the left</div>';
return;
}}
const item = tabs[state.tab].rows.find(row => row.id === selectedId);
if (!item) {{
panel.innerHTML = '<div class="muted">selected item is missing</div>';
return;
}}
if (state.tab === 'assemblies') panel.innerHTML = renderAssemblyDetail(item);
if (state.tab === 'genericTypes') panel.innerHTML = renderGenericTypeDetail(item);
if (state.tab === 'genericMethods') panel.innerHTML = renderGenericMethodDetail(item);
}}
document.querySelector('.tabs').addEventListener('click', event => {{
const tab = event.target.closest('.tab');
if (!tab) return;
state.tab = tab.dataset.tab;
document.querySelectorAll('.tab').forEach(node => node.classList.toggle('active', node === tab));
renderList();
renderDetail();
}});
document.getElementById('list-filter').addEventListener('input', event => {{
state.filter[state.tab] = event.target.value;
renderList();
}});
document.querySelector('#list-table tbody').addEventListener('click', event => {{
const row = event.target.closest('tr[data-row-id]');
if (!row) return;
state.selected[state.tab] = row.dataset.rowId;
renderList();
renderDetail();
}});
renderList();
renderDetail();
</script>
</body>
</html>"""
def main():
parser = argparse.ArgumentParser(description="Generate IL2CPP construct report.")
parser.add_argument("root", nargs="?", default=None, help="path to il2cppOutput")
parser.add_argument("--json-out", default=None, help="json output path")
parser.add_argument("--html-out", default=None, help="html output path")
parser.add_argument("--no-console", action="store_true")
args = parser.parse_args()
root = detect_root(args.root)
if not root.is_dir():
raise SystemExit(f"missing dir: {root}")
out_dir = root.parent if root.name == "il2cppOutput" else root
json_out = Path(args.json_out) if args.json_out else out_dir / "il2cpp-report.json"
html_out = Path(args.html_out) if args.html_out else out_dir / "il2cpp-report.html"
paths = [path for path in sorted(root.iterdir()) if path.is_file() and path.suffix in SOURCE_SUFFIXES]
files = run_parallel(paths, scan_file)
structs = build_type_graph(root)
resolved_variants = resolve_generic_variants(root, structs)
resolved_methods = resolve_generic_methods(root, resolved_variants)
report = build_report(files, resolved_variants, resolved_methods)
json_out.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
html_out.write_text(render_html(report), encoding="utf-8")
if not args.no_console:
print_console_report(report)
print()
print(f"json: {json_out}")
print(f"html: {html_out}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment