Last active
February 24, 2024 07:59
-
-
Save UserUnknownFactor/9e15d74c27823f3ec4cf10f732c2db5a to your computer and use it in GitHub Desktop.
Converts TypeTree generated by UnityPy reflection generator to .ksy format of KaitaiStruct for parsing analysys
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sys import argv | |
| import json, re | |
| from ctypes import c_uint32 | |
| from tqdm import tqdm | |
| # For usage with KaitaiStruct visualisers | |
| # (like https://userunknownfactor.github.io/ for example) | |
| extract_class = "ClassName" | |
| ASSEMBLY_TREES: dict = dict() | |
| depth = 0 | |
| result = '' | |
| prefix = """meta: | |
| id: assembly_typetrees | |
| endian: le | |
| encoding: UTF-8 | |
| title: Assembly TypeTrees Types Definitions | |
| file-extension: dat | |
| seq: | |
| - id: test_sequence | |
| type: {class_name} | |
| #instances: | |
| # unity_version: | |
| # value: 2020 | |
| types:""" | |
| kAlignBytes = 0x4000 | |
| name_field = "name" | |
| type_field = "type" | |
| align_field = "meta_flag" | |
| level_field = "level" | |
| fields_changed = False | |
| def set_old_fields(): | |
| global name_field,type_field,align_field,level_field,fields_changed | |
| if fields_changed: | |
| return | |
| name_field = "m_Name" | |
| type_field = "m_Type" | |
| align_field = "m_MetaFlag" | |
| level_field = "m_Level" | |
| fields_changed = True | |
| appendixes = {"aligned_string":""" aligned_string: | |
| seq: | |
| - id: len | |
| type: u4 | |
| - id: string | |
| type: str | |
| encoding: utf-8 | |
| size: len | |
| - size: (4 - _io.pos) % 4\n""", | |
| "unity_py_binary_blob":""" unity_py_binary_blob: | |
| seq: | |
| - id: data | |
| type: u1 | |
| repeat: eos\n""", | |
| "pptr":""" pptr: | |
| seq: | |
| - id: file_id | |
| type: u4 | |
| - id: path_id | |
| type: u8\n""" | |
| } | |
| with open(argv[1] if len(argv) > 1 else "assembly_typetrees.json", "r", encoding="utf-8-sig") as f: | |
| ASSEMBLY_TREES = json.loads(f.read()) | |
| tree_items = ASSEMBLY_TREES.items() | |
| def sanitize(text:str): | |
| return re.sub( | |
| r'<(?=\w)|(?<=\w)>|`', r'', re.sub(r'([a-z])([A-Z])', r'\1_\2', text) | |
| ).lower().replace('.', '_').replace('/', '_').replace('[]','') | |
| def pd(text:str, dont_sanitize:bool=False, only_return:bool=False, use_depth:int=None): | |
| global depth | |
| processed = '' | |
| if use_depth is None: | |
| use_depth = 0 if only_return else depth | |
| for line in text.splitlines(): | |
| new_line = (' ' * use_depth) + (line if dont_sanitize else sanitize(line)) + '\n' | |
| processed += new_line | |
| if not only_return: | |
| global result | |
| result += processed | |
| return '' | |
| return processed | |
| def align_stream(ttype=None, only_return=False): | |
| if ttype is None or ttype[align_field] & kAlignBytes != 0: | |
| ret = pd("- size: (4 - _io.pos) % 4", True, only_return) | |
| if only_return: | |
| return ret | |
| return '' | |
| def get_subtree_at(nodes: list, item_n: c_uint32) -> list: | |
| """Copies all nodes above the level of the node at the set index.""" | |
| children = [nodes[item_n.value]] | |
| current_level = nodes[item_n.value][level_field] | |
| for node in nodes[(item_n.value + 1):]: | |
| if node[level_field] <= current_level: | |
| break | |
| children.append(node) | |
| item_n.value += len(children) - 1 | |
| return children | |
| def is_subtree_same_level(nodes:list) -> bool: | |
| current_level = nodes[1][level_field] | |
| return all(x[level_field] == current_level for x in nodes[1:]) | |
| def match_base_type(_type: str): | |
| match _type.lower(): | |
| case "sint8": | |
| _type = "s1" | |
| case "uint8" | "char": | |
| _type = "u1" | |
| case "short" | "sint16": | |
| _type = "s2" | |
| case "uint16" | "unsigned short": | |
| _type = "u2" | |
| case "int" | "sint32": | |
| _type = "s4" | |
| case "uint32" | "unsigned int" | "Type*": | |
| _type = "u4" | |
| case "long long" | "sint64": | |
| _type = "s8" | |
| case "uint64" | "unsigned long long" | "filesize": | |
| _type = "u8" | |
| case "float": | |
| _type = "f4" | |
| case "double": | |
| _type = "f8" | |
| case "bool": | |
| _type = "b1" | |
| case _: | |
| return None | |
| return _type | |
| def replace_type(i:c_uint32, tlist:list, all_trees:dict, parent:str, return_instead:bool=False): | |
| _ttype: dict = tlist[i.value] | |
| if name_field not in _ttype: | |
| set_old_fields() | |
| _name: str = sanitize(_ttype[name_field]) | |
| _type: str = _ttype[type_field] | |
| _level: int = _ttype[level_field] | |
| _t = match_base_type(_type) | |
| if _t is None: | |
| match _type: | |
| case _type if _type.startswith("PPtr<"): | |
| add = f"_of_{parent}" if len(parent) else '' | |
| ret = pd(f"- id: pptr_{_name}{add}\n type: pptr", only_return=return_instead) | |
| i.value += 2 | |
| if return_instead: | |
| return ret | |
| return '' | |
| case "string": | |
| ret = parse_string(i, tlist, _ttype, _name, return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| case "String[]": | |
| ret = parse_str_array(i, tlist, _ttype, _name, return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| case "map": # map == MultiDict | |
| ret = parse_map(i, tlist, _name, return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| case "TypelessData": | |
| ret = parse_typeless(i, tlist, _ttype, _name, return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| case _: | |
| if i.value < len(tlist) - 1 and tlist[i.value + 1][type_field] == "Array": | |
| ret = parse_array(i, tlist, _name, _type, all_trees, return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| else: # Class | |
| clz = get_subtree_at(tlist, i) | |
| if len(clz) != 1: | |
| ret = parse_complex_class(_ttype, _name, clz, all_trees) | |
| if _level > 1: | |
| return ret | |
| return '' | |
| else: | |
| ret = parse_class(_type, clz, all_trees, return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| else: | |
| _type = _t | |
| #if (_ttype[level_field] > 1): | |
| # add = f"_of_{parent}" if len(parent) else '' | |
| # template = "- id: {typ_id}" + add + "\n type: {typ_name}" | |
| #else: | |
| template = "- id: " + ("{typ_id}" if is_subtree_same_level(tlist) or not parent else "{typ_id}_of_{parent}") + "\n type: {typ_name}" | |
| ret = pd(template.format(typ_id=_name, typ_name=_type, parent=parent), only_return=return_instead) | |
| ret += align_stream(_ttype, only_return=return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def parse_class(cls_type, clz, all_trees, return_instead): | |
| ret = '' | |
| is_array = cls_type[-2:] == "[]" | |
| cls_name = sanitize(clz[0][name_field]) | |
| if is_array: | |
| ret += pd(f"- id: {cls_name}_len\n type: u4", only_return=return_instead) | |
| ret += pd(f"- id: {cls_name}\n type: {sanitize(clz[0][type_field])}", only_return=return_instead) | |
| if is_array: | |
| ret += pd(f" repeat: expr\n repeat-expr: {cls_name}_len\n if: {cls_name}_len > 0", only_return=return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def parse_complex_class(ttype, name, clz, all_trees): | |
| j = c_uint32(1) | |
| ret = '' | |
| _type = clz[0][type_field] | |
| _level = clz[0][level_field] | |
| deep_level = _level > 1 | |
| while j.value < len(clz): | |
| ret += replace_type(j, clz, all_trees, parent=name, return_instead=True) | |
| j.value += 1 | |
| ret += align_stream(ttype, only_return=True) | |
| _type = sanitize(_type) | |
| if _type not in appendixes: | |
| appendixes[_type] = make_appendix(_type, ret) | |
| if name == "data": | |
| return '' | |
| return pd(f"- id: class_{name}\n type: {_type}", only_return=deep_level) | |
| def parse_array(i, tlist, name, var_type, all_trees, return_instead): | |
| align = (tlist[i.value + 1][align_field] & kAlignBytes) != 0 | |
| _array = get_subtree_at(tlist, i) | |
| #name = name + var_type | |
| ret = pd(f"- id: {sanitize(name)}_array_len\n type: u4", only_return=return_instead) | |
| j = c_uint32(3) | |
| while j.value < len(_array): | |
| ret += replace_type(j, _array, all_trees, parent=name, return_instead=True) | |
| j.value += 1 | |
| if len(_array) == 7 and _array[3][type_field] == "string": | |
| _type = "aligned_string" | |
| else: | |
| _type = sanitize(_array[3][type_field]) | |
| val = pd(f"- id: {name}\n type: {_type}\n repeat: expr\n repeat-expr: {sanitize(name)}_array_len\n if: {name}_array_len > 0", only_return=return_instead, dont_sanitize=True) | |
| if return_instead: | |
| ret += val | |
| if align: | |
| ret += align_stream(only_return=return_instead) | |
| global appendixes | |
| _type = sanitize(var_type) | |
| if ret and _type not in appendixes: | |
| global depth | |
| if _type not in ["list1", "vector"]: | |
| appendixes[_type] = make_appendix(_type, ret) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def make_appendix(var_type, declaration): | |
| return pd(f"{var_type}:\n seq:\n{pd(declaration, dont_sanitize=True, only_return=True, use_depth=2)}", dont_sanitize=True, only_return=True, use_depth=1) | |
| def parse_typeless(i, tlist, ttype, name, return_instead): | |
| get_subtree_at(tlist, i) | |
| ret = pd(f"- id: {sanitize(name)}_typeless_len\n type: u4\n- id: {name}\n type: u1\n repeat: expr\n repeat-expr: {sanitize(name)}_typeless_len\n if: {name}_typeless_len > 0", only_return=return_instead) | |
| align_stream(ttype, only_return=return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def parse_map(i, tlist, name, return_instead): | |
| align = (tlist[i.value + 1][align_field] & kAlignBytes) != 0 | |
| map_ = get_subtree_at(tlist, i) | |
| first = get_subtree_at(map_, c_uint32(4)) | |
| second = get_subtree_at(map_, c_uint32(4 + len(first))) | |
| ret = pd(f"# (key,value) types defined at the end", only_return=return_instead) | |
| ret += pd(f"- id: {sanitize(name)}_map_len\n type: u4", only_return=return_instead) | |
| _name = sanitize(name) | |
| _type = first[0][type_field] | |
| if tlist[i.value][level_field] > 1 and _name not in appendixes and _name != "List`1": | |
| appendixes[_name] = pd(f" key_value_pair_of_{_name}:\n seq:\n - id: key\n type: {_type}\n - id: value\n type: {second[0][type_field]}", only_return=True) | |
| ret += pd(f"- id: {name}\n type: key_value_pair_of_{_name}\n repeat: expr\n repeat-expr: {_name}_map_len\n if: {name}_map_len > 0", only_return=return_instead) | |
| if align: | |
| ret += align_stream(only_return=return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def parse_str_array(i, tlist, ttype, name, return_instead): | |
| get_subtree_at(tlist, i) | |
| ret = pd(f"- id: {name}_len\n type: u4\n- id: {name}\n type: aligned_string\n repeat: expr\n repeat-expr: {name}_len\n if: {name}_len > 0", only_return=return_instead) | |
| ret += align_stream(ttype, only_return=return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def parse_string(i, tlist, ttype, name, return_instead): | |
| get_subtree_at(tlist, i) | |
| ret = pd(f"- id: {name}\n type: aligned_string", only_return=return_instead) | |
| ret += align_stream(ttype, only_return=return_instead) | |
| if return_instead: | |
| return ret | |
| return '' | |
| def process_class_typetree(ttree:list, i:c_uint32, tree_items:dict): | |
| while i.value < len(ttree): | |
| replace_type(i, ttree, tree_items, parent='') | |
| i.value += 1 | |
| print("Converting TypeTrees to KSY format...") | |
| for class_name, ttree in tqdm(tree_items): | |
| if extract_class and class_name.lower() != extract_class.lower(): continue | |
| extract_class = class_name | |
| depth += 1 | |
| pd(sanitize(extract_class) + ":") | |
| depth += 1 | |
| pd("seq:") | |
| depth += 1 | |
| process_class_typetree(ttree, c_uint32(1), tree_items) # starts from 1 to go directly to the 1st level | |
| depth -= 3 | |
| if len(appendixes) > 0: | |
| appendixes = "\n # Inner Classes\n" + ''.join(appendixes.values()) + "\n # Main Class\n" | |
| cn = sanitize(extract_class) | |
| with open(f"{cn}.ksy", "w", encoding="utf-8-sig") as f: | |
| f.write(prefix.format(class_name=cn) + appendixes + result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment