Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active February 24, 2024 07:59
Show Gist options
  • Select an option

  • Save UserUnknownFactor/9e15d74c27823f3ec4cf10f732c2db5a to your computer and use it in GitHub Desktop.

Select an option

Save UserUnknownFactor/9e15d74c27823f3ec4cf10f732c2db5a to your computer and use it in GitHub Desktop.
Converts TypeTree generated by UnityPy reflection generator to .ksy format of KaitaiStruct for parsing analysys
from sys import argv
import json, re
from ctypes import c_uint32
from tqdm import tqdm
# For usage with KaitaiStruct visualisers
# (like https://userunknownfactor.github.io/ for example)
extract_class = "ClassName"
ASSEMBLY_TREES: dict = dict()
depth = 0
result = ''
prefix = """meta:
id: assembly_typetrees
endian: le
encoding: UTF-8
title: Assembly TypeTrees Types Definitions
file-extension: dat
seq:
- id: test_sequence
type: {class_name}
#instances:
# unity_version:
# value: 2020
types:"""
kAlignBytes = 0x4000
name_field = "name"
type_field = "type"
align_field = "meta_flag"
level_field = "level"
fields_changed = False
def set_old_fields():
global name_field,type_field,align_field,level_field,fields_changed
if fields_changed:
return
name_field = "m_Name"
type_field = "m_Type"
align_field = "m_MetaFlag"
level_field = "m_Level"
fields_changed = True
appendixes = {"aligned_string":""" aligned_string:
seq:
- id: len
type: u4
- id: string
type: str
encoding: utf-8
size: len
- size: (4 - _io.pos) % 4\n""",
"unity_py_binary_blob":""" unity_py_binary_blob:
seq:
- id: data
type: u1
repeat: eos\n""",
"pptr":""" pptr:
seq:
- id: file_id
type: u4
- id: path_id
type: u8\n"""
}
with open(argv[1] if len(argv) > 1 else "assembly_typetrees.json", "r", encoding="utf-8-sig") as f:
ASSEMBLY_TREES = json.loads(f.read())
tree_items = ASSEMBLY_TREES.items()
def sanitize(text:str):
return re.sub(
r'<(?=\w)|(?<=\w)>|`', r'', re.sub(r'([a-z])([A-Z])', r'\1_\2', text)
).lower().replace('.', '_').replace('/', '_').replace('[]','')
def pd(text:str, dont_sanitize:bool=False, only_return:bool=False, use_depth:int=None):
global depth
processed = ''
if use_depth is None:
use_depth = 0 if only_return else depth
for line in text.splitlines():
new_line = (' ' * use_depth) + (line if dont_sanitize else sanitize(line)) + '\n'
processed += new_line
if not only_return:
global result
result += processed
return ''
return processed
def align_stream(ttype=None, only_return=False):
if ttype is None or ttype[align_field] & kAlignBytes != 0:
ret = pd("- size: (4 - _io.pos) % 4", True, only_return)
if only_return:
return ret
return ''
def get_subtree_at(nodes: list, item_n: c_uint32) -> list:
"""Copies all nodes above the level of the node at the set index."""
children = [nodes[item_n.value]]
current_level = nodes[item_n.value][level_field]
for node in nodes[(item_n.value + 1):]:
if node[level_field] <= current_level:
break
children.append(node)
item_n.value += len(children) - 1
return children
def is_subtree_same_level(nodes:list) -> bool:
current_level = nodes[1][level_field]
return all(x[level_field] == current_level for x in nodes[1:])
def match_base_type(_type: str):
match _type.lower():
case "sint8":
_type = "s1"
case "uint8" | "char":
_type = "u1"
case "short" | "sint16":
_type = "s2"
case "uint16" | "unsigned short":
_type = "u2"
case "int" | "sint32":
_type = "s4"
case "uint32" | "unsigned int" | "Type*":
_type = "u4"
case "long long" | "sint64":
_type = "s8"
case "uint64" | "unsigned long long" | "filesize":
_type = "u8"
case "float":
_type = "f4"
case "double":
_type = "f8"
case "bool":
_type = "b1"
case _:
return None
return _type
def replace_type(i:c_uint32, tlist:list, all_trees:dict, parent:str, return_instead:bool=False):
_ttype: dict = tlist[i.value]
if name_field not in _ttype:
set_old_fields()
_name: str = sanitize(_ttype[name_field])
_type: str = _ttype[type_field]
_level: int = _ttype[level_field]
_t = match_base_type(_type)
if _t is None:
match _type:
case _type if _type.startswith("PPtr<"):
add = f"_of_{parent}" if len(parent) else ''
ret = pd(f"- id: pptr_{_name}{add}\n type: pptr", only_return=return_instead)
i.value += 2
if return_instead:
return ret
return ''
case "string":
ret = parse_string(i, tlist, _ttype, _name, return_instead)
if return_instead:
return ret
return ''
case "String[]":
ret = parse_str_array(i, tlist, _ttype, _name, return_instead)
if return_instead:
return ret
return ''
case "map": # map == MultiDict
ret = parse_map(i, tlist, _name, return_instead)
if return_instead:
return ret
return ''
case "TypelessData":
ret = parse_typeless(i, tlist, _ttype, _name, return_instead)
if return_instead:
return ret
return ''
case _:
if i.value < len(tlist) - 1 and tlist[i.value + 1][type_field] == "Array":
ret = parse_array(i, tlist, _name, _type, all_trees, return_instead)
if return_instead:
return ret
return ''
else: # Class
clz = get_subtree_at(tlist, i)
if len(clz) != 1:
ret = parse_complex_class(_ttype, _name, clz, all_trees)
if _level > 1:
return ret
return ''
else:
ret = parse_class(_type, clz, all_trees, return_instead)
if return_instead:
return ret
return ''
else:
_type = _t
#if (_ttype[level_field] > 1):
# add = f"_of_{parent}" if len(parent) else ''
# template = "- id: {typ_id}" + add + "\n type: {typ_name}"
#else:
template = "- id: " + ("{typ_id}" if is_subtree_same_level(tlist) or not parent else "{typ_id}_of_{parent}") + "\n type: {typ_name}"
ret = pd(template.format(typ_id=_name, typ_name=_type, parent=parent), only_return=return_instead)
ret += align_stream(_ttype, only_return=return_instead)
if return_instead:
return ret
return ''
def parse_class(cls_type, clz, all_trees, return_instead):
ret = ''
is_array = cls_type[-2:] == "[]"
cls_name = sanitize(clz[0][name_field])
if is_array:
ret += pd(f"- id: {cls_name}_len\n type: u4", only_return=return_instead)
ret += pd(f"- id: {cls_name}\n type: {sanitize(clz[0][type_field])}", only_return=return_instead)
if is_array:
ret += pd(f" repeat: expr\n repeat-expr: {cls_name}_len\n if: {cls_name}_len > 0", only_return=return_instead)
if return_instead:
return ret
return ''
def parse_complex_class(ttype, name, clz, all_trees):
j = c_uint32(1)
ret = ''
_type = clz[0][type_field]
_level = clz[0][level_field]
deep_level = _level > 1
while j.value < len(clz):
ret += replace_type(j, clz, all_trees, parent=name, return_instead=True)
j.value += 1
ret += align_stream(ttype, only_return=True)
_type = sanitize(_type)
if _type not in appendixes:
appendixes[_type] = make_appendix(_type, ret)
if name == "data":
return ''
return pd(f"- id: class_{name}\n type: {_type}", only_return=deep_level)
def parse_array(i, tlist, name, var_type, all_trees, return_instead):
align = (tlist[i.value + 1][align_field] & kAlignBytes) != 0
_array = get_subtree_at(tlist, i)
#name = name + var_type
ret = pd(f"- id: {sanitize(name)}_array_len\n type: u4", only_return=return_instead)
j = c_uint32(3)
while j.value < len(_array):
ret += replace_type(j, _array, all_trees, parent=name, return_instead=True)
j.value += 1
if len(_array) == 7 and _array[3][type_field] == "string":
_type = "aligned_string"
else:
_type = sanitize(_array[3][type_field])
val = pd(f"- id: {name}\n type: {_type}\n repeat: expr\n repeat-expr: {sanitize(name)}_array_len\n if: {name}_array_len > 0", only_return=return_instead, dont_sanitize=True)
if return_instead:
ret += val
if align:
ret += align_stream(only_return=return_instead)
global appendixes
_type = sanitize(var_type)
if ret and _type not in appendixes:
global depth
if _type not in ["list1", "vector"]:
appendixes[_type] = make_appendix(_type, ret)
if return_instead:
return ret
return ''
def make_appendix(var_type, declaration):
return pd(f"{var_type}:\n seq:\n{pd(declaration, dont_sanitize=True, only_return=True, use_depth=2)}", dont_sanitize=True, only_return=True, use_depth=1)
def parse_typeless(i, tlist, ttype, name, return_instead):
get_subtree_at(tlist, i)
ret = pd(f"- id: {sanitize(name)}_typeless_len\n type: u4\n- id: {name}\n type: u1\n repeat: expr\n repeat-expr: {sanitize(name)}_typeless_len\n if: {name}_typeless_len > 0", only_return=return_instead)
align_stream(ttype, only_return=return_instead)
if return_instead:
return ret
return ''
def parse_map(i, tlist, name, return_instead):
align = (tlist[i.value + 1][align_field] & kAlignBytes) != 0
map_ = get_subtree_at(tlist, i)
first = get_subtree_at(map_, c_uint32(4))
second = get_subtree_at(map_, c_uint32(4 + len(first)))
ret = pd(f"# (key,value) types defined at the end", only_return=return_instead)
ret += pd(f"- id: {sanitize(name)}_map_len\n type: u4", only_return=return_instead)
_name = sanitize(name)
_type = first[0][type_field]
if tlist[i.value][level_field] > 1 and _name not in appendixes and _name != "List`1":
appendixes[_name] = pd(f" key_value_pair_of_{_name}:\n seq:\n - id: key\n type: {_type}\n - id: value\n type: {second[0][type_field]}", only_return=True)
ret += pd(f"- id: {name}\n type: key_value_pair_of_{_name}\n repeat: expr\n repeat-expr: {_name}_map_len\n if: {name}_map_len > 0", only_return=return_instead)
if align:
ret += align_stream(only_return=return_instead)
if return_instead:
return ret
return ''
def parse_str_array(i, tlist, ttype, name, return_instead):
get_subtree_at(tlist, i)
ret = pd(f"- id: {name}_len\n type: u4\n- id: {name}\n type: aligned_string\n repeat: expr\n repeat-expr: {name}_len\n if: {name}_len > 0", only_return=return_instead)
ret += align_stream(ttype, only_return=return_instead)
if return_instead:
return ret
return ''
def parse_string(i, tlist, ttype, name, return_instead):
get_subtree_at(tlist, i)
ret = pd(f"- id: {name}\n type: aligned_string", only_return=return_instead)
ret += align_stream(ttype, only_return=return_instead)
if return_instead:
return ret
return ''
def process_class_typetree(ttree:list, i:c_uint32, tree_items:dict):
while i.value < len(ttree):
replace_type(i, ttree, tree_items, parent='')
i.value += 1
print("Converting TypeTrees to KSY format...")
for class_name, ttree in tqdm(tree_items):
if extract_class and class_name.lower() != extract_class.lower(): continue
extract_class = class_name
depth += 1
pd(sanitize(extract_class) + ":")
depth += 1
pd("seq:")
depth += 1
process_class_typetree(ttree, c_uint32(1), tree_items) # starts from 1 to go directly to the 1st level
depth -= 3
if len(appendixes) > 0:
appendixes = "\n # Inner Classes\n" + ''.join(appendixes.values()) + "\n # Main Class\n"
cn = sanitize(extract_class)
with open(f"{cn}.ksy", "w", encoding="utf-8-sig") as f:
f.write(prefix.format(class_name=cn) + appendixes + result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment