Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Created October 23, 2020 08:17
Show Gist options
  • Select an option

  • Save UserUnknownFactor/1340400ca71c494fa057fdf6e0d33c38 to your computer and use it in GitHub Desktop.

Select an option

Save UserUnknownFactor/1340400ca71c494fa057fdf6e0d33c38 to your computer and use it in GitHub Desktop.
Unity game translation tools
import os, sys
from glob import glob
from UnityPy import AssetsManager
from collections import Counter
import zipfile
from string import Template
from filetranslate.service_fn import write_csv_list
USE_YAML = False
try:
import ruamel.yaml
USE_YAML = True
except:
pass
TYPES = ['MonoBehaviour']#, 'Texture2D', 'TextAsset'] #, 'Sprite'
yaml = ruamel.yaml.YAML()
ROOT = os.path.abspath(os.getcwd()) # base directory
DST = os.path.join(ROOT, "extracted") # destination folder
ASSETS = os.path.join(ROOT,"*.unity3d") # source folder or file
asset_offsets = []
def main():
os.makedirs(DST, exist_ok=True)
asset_offsets.append([
"name",
"data.reader.byte_base_offset",
"data.reader.byte_header_size",
"data.reader.byte_start_offset",
"byte_start_offset_size",
"data.reader.byte_start",
"data.reader.byte_size_offset",
"byte_size_offset_size",
"data.reader.byte_size"
])
for file_name in glob(ASSETS):
extension = os.path.splitext(file_name)[1]
src = os.path.realpath(os.path.join(ROOT, file_name))
if extension == ".zip":
archive = zipfile.ZipFile(src, 'r')
for zf in archive.namelist():
am = AssetsManager(archive.open(zf))
print("Parsing file:", zf)
am.process(export_obj, TYPES)
else:
am = AssetsManager(src)
print("Parsing file:", src)
am.process(export_obj, TYPES)
write_csv_list("assets_offsets.csv", asset_offsets)
def export_obj(obj, asset: str, local_path: str) -> list:
objfmt = str(obj.type)
if objfmt not in TYPES: return []
data = obj.read()
name = data.name if (data.name is not None and data.name != '') else "unnamed asset"
fname, extension = os.path.splitext(name)
name = "%s-%s-%d" % (fname, asset, obj.path_id)
fp = os.path.join(DST, local_path, name)
os.makedirs(os.path.dirname(fp), exist_ok=True)
file_offset = data.reader.byte_base_offset + data.reader.byte_start
written_offset = data.reader.byte_start - data.reader.byte_header_offset
delta = file_offset - written_offset
asset_offsets.append([
name, # asset name
data.reader.byte_base_offset, # base offset of the stream in the unpacked file
data.reader.byte_header_offset, # included inside byte_start but written without it to disk
*data.reader.byte_start_offset, # offset of byte_start inside the stream
data.reader.byte_start, # byte_start
*data.reader.byte_size_offset, # offset of byte_size inside the stream
data.reader.byte_size # byte_size
])
if objfmt == 'TextAsset':
extension = '.txt'
with open(f"{fp}{extension}", 'wb', encoding="utf-8") as f:
if data.script:
f.write(data.script)
elif objfmt == "Sprite":
extension = ".png"
data.image.save(f"{fname}{extension}")
#print(obj.path_id , data.m_RD.texture.path_id, getattr(data.m_RD.alphaTexture, 'path_id', None))
elif objfmt == 'MonoBehaviour':
extension = ".dat"
script = data.script.read()
if not script: return [obj.path_id]
fp = os.path.join(fpath, script.namespace, script.class_name, name)
os.makedirs(os.path.dirname(fp), exist_ok=True)
with open(f"{fp}{extension}", "wb") as f: f.write(data.get_raw_data())
elif objfmt == "Texture2D":
return [obj.path_id]
if data.m_TextureFormat.name is not None:
objfmt = data.m_TextureFormat.name
extension = ".png"
with open(f"{fp}.data", "wb") as f: f.write(data.image_data)
templ = ''
with open("Texture2D_template.txt", "r") as f: templ = Template(f.read())
with open(f"{fp}{extension}.txt", "w") as f:
f.write(templ.substitute(m_Name=data.name, m_Width=data.m_Width, m_Height=data.m_Height, m_CompleteImageSize=data.m_CompleteImageSize))
if not os.path.exists(f"{fp}{extension}"):
try:
data.image.save(f"{fp}{extension}")
except EOFError as e:
print(repr(e), "in file:", asset, "object type:", objfmt)
return []
else:
fp = "%s-%s-%d" % (asset, obj.path_id, obj.type)
#print("writing", obj.type, "to", fp, "format", objfmt)
return [obj.path_id]
if __name__ == '__main__':
main()
import csv, os, sys, argparse, kaitaistruct
import glob
from unity_strings_in_dat import *
from filetranslate.service_fn import read_csv_list, write_csv_list
from pkg_resources import parse_version
import kaitaistruct
from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO
ZERO_WIDTH = '\u200C' #'\u200B'
NON_ASCII_SPACE = '\u2007'
SPACE_CHAR = ' '
EXLUDED_DIRS = set([".git",".vscode",".backup", "__pycache__", "[Originals]", "translation_out"])
COMBINED_FN = 'combined_new_strings.csv'
FILENAME_SPECS = [("assets_mono\\1\\*\\*.dat", [0x10, 1, 0]),
("assets_mono\\2\\unnamed asset-sharedassets0.assets-2.dat", [0x44, 0x100, 0]),]
VERSION_REQUIRED = '0.10'
if parse_version(kaitaistruct.__version__) < parse_version(VERSION_REQUIRED):
raise Exception("Incompatible KaitaiStruct Python API: %s or later is required, but you have %s"
% (VERSION_REQUIRED, kaitaistruct.__version__))
class UnityStrDat(KaitaiStruct):
def __init__(self, _io, _parent=None, _root=None, _init=False):
super().__init__(_io, _parent, _root, _init)
if (self._init):
self.set_offsets(0x2c, 1, 0, False)
self._read()
def set_offsets(self, pre_len, num_strings, post_str_len, cut_to_size=False):
self._num_strings_fixed = num_strings
self._string_post_fix_len = post_str_len
self._header_len = pre_len
self._cut_to_size = cut_to_size
self._init = True
def _read(self):
self.header = UnityStrDat.HeaderRaw(self._io, self, self._root, self._init)
if self._num_strings_fixed == 0:
self.num_strings = self._io.read_u4le()
else:
self.num_strings = self._num_strings_fixed
self.strings_array = [None] * (self.num_strings)
for i in range(self.num_strings):
pos = self._io.pos()
temp = UnityStrDat.StringStruct(self._io, self, self._root, self._init)
if hasattr(temp, 'string') and temp.string is not None:
self.strings_array[i] = temp
else:
self._io.seek(pos)
del self.strings_array[i:]
break
self.footer = UnityStrDat.FooterRaw(self._io, self, self._root, self._init)
self._init = False
def _write(self):
self.header._write()
if self._num_strings_fixed == 0:
self._io.write_u4le(len(self.strings_array))
for i in range(len(self.strings_array)):
self.strings_array[i]._write()
self.footer._write()
class HeaderRaw(KaitaiStruct):
def __init__(self, _io, _parent=None, _root=None, _init=False):
super().__init__(_io, _parent, _root, _init)
if (self._init): self._read()
def _read(self):
self.header_raw_bytes = self._io.read_bytes(self._parent._header_len)
self._init = False
def _write(self):
data = self.header_raw_bytes
l = len(data)
if not data or self._parent._header_len != l:
raise Exception("No header provided or wrong size (%d, instead of %d)" % (l, self._parent._header_len))
self._io.write_bytes(data)
class FooterRaw(KaitaiStruct):
def __init__(self, _io, _parent=None, _root=None, _init=False):
super().__init__(_io, _parent, _root, _init)
if (self._init): self._read()
def _read(self):
self.footer_raw_bytes = self._io.read_bytes_full()
self._init = False
def _write(self):
data = self.footer_raw_bytes
if len(data) > 0:
self._io.write_bytes(data)
class StringStruct(KaitaiStruct):
def __init__(self, _io, _parent=None, _root=None, _init=False):
super().__init__(_io, _parent, _root, _init)
self._encoding = u"utf-8"
if (self._init): self._read()
def _read(self):
try:
self.s_size = self._io.read_u4le()
except EOFError:
self._init = False
return
if self.s_size > 500: # ignore obviously invalid strings
print("Invalid size:", self._io._io.name)
return
try:
self.string = self._io.read_bytes(self.s_size).decode(self._encoding)
self._io.seek_alignment(4)
except:
print("Invalid utf-8 in the stream:", self._io._io.name)
return
if self._parent._string_post_fix_len > 0:
self.string_post_fix = self._io.read_bytes(self._parent._string_post_fix_len)
def _write(self):
data = self.string.encode(self._encoding)
align = 0
if self._root._cut_to_size and len(data) < self.s_size:
align = self.s_size - len(data)
data += b'\x20' * align
#elif self._cut_to_size and len(data) > self.s_size:
# data = data[:self.s_size] # keep the same size
self.s_size = len(data)
self._io.write_u4le(self.s_size)
self._io.write_bytes(data)
self._io.write_alignment(4, 0)
if self._parent._string_post_fix_len > 0 and self.string_post_fix is not None:
self._io.write_bytes(self.string_post_fix)
def fill_or_cut_string(string, original_len, filler=ZERO_WIDTH, encoding='utf-8'):
""" Fill or cut a string to its original byte length.
"""
n = len(string.encode(encoding))
while n > original_len:
string = string[:-1]
n = len(string.encode(encoding))
if n < original_len:
if (original_len - n) % 2 != 0:
string += SPACE_CHAR # align with space char (1-byte) to even number of bytes
n = len(string.encode(encoding))
while n < original_len:
string += ZERO_WIDTH # pad with zero-length 2-byte chars
n = len(string.encode(encoding))
assert(len(string.encode(encoding)) == original_len)
return string
def main():
parser = argparse.ArgumentParser(description='Unity file raw tool')
mode = parser.add_mutually_exclusive_group()
mode.add_argument('-pack', help="pack", action="store_true")
mode.add_argument("-unpack", help="unpack", action="store_true")
if len(sys.argv) < 2:
print("Unity string translation tool v1")
parser.print_help(sys.stderr)
return
app_args = parser.parse_args()
_separate = False
if app_args.pack:
data = None
for fnspec in FILENAME_SPECS:
data1 = None
if '*' in fnspec[0]:
if data is None:
data = read_csv_list(COMBINED_FN)
else:
data1 = read_csv_list(os.path.splitext(fnspec[0])[0]+'_strings.csv')
file_names = glob.glob(fnspec[0])
if len(file_names) == 0: continue
for file_name in file_names:
new_name = os.path.join("translation_out", os.path.splitext(file_name)[0] + os.path.splitext(file_name)[1])
if os.path.exists(new_name):
continue
is_changed = False
a = None
try:
a = UnityStrDat.from_file(file_name, _init=False)
a.set_offsets(*fnspec[1])
a._read()
a.close()
except TypeError as e:
print(repr(e))
continue
except Exception as e:
print(repr(e))
continue
if a.strings_array is None:
continue
path_out = os.path.dirname(new_name)
if path_out != '' and not os.path.exists(path_out):
os.makedirs(path_out, exist_ok=True)
for i, original in enumerate(a.strings_array):
for line in (data1 if data1 is not None else data):
if original.string == line[0]:
a.strings_array[i].string = line[1]
is_changed = True
if data1 is not None:
break
if is_changed:
print("Packing", new_name, "...")
a.to_file(new_name)
a.close()
else:
data = None
is_found = False
for fnspec in FILENAME_SPECS:
data1 = None
if '*' in fnspec[0]:
if data is None:
data = [i for i in read_csv_list(COMBINED_FN)]
file_names = glob.glob(fnspec[0])
if len(file_names) == 0: continue
for file_name in file_names:
try:
with UnityStrDat.from_file(file_name, _init=False) as a:
a.set_offsets(*fnspec[1])
a._read()
for i in a.strings_array:
if len(i.string) and (i.string not in [j[0] for j in data]):
data.append([i.string, ''])
print("Unpacking", file_name, "...")
except Exception as e:
print(repr(e))
pass
is_found = True
else:
sp_filename = os.path.splitext(fnspec[0])[0] + '_strings.csv'
data1 = [i for i in read_csv_list(sp_filename)]
try:
with UnityStrDat.from_file(fnspec[0], _init=False) as a:
a.set_offsets(*fnspec[1])
a._read()
for i in a.strings_array:
if len(i.string) and (i.string not in [j[0] for j in data1]):
data1.append([i.string, ''])
print("Unpacking", fnspec[0], "...")
except Exception as e:
print(repr(e))
pass
write_csv_list(sp_filename, data1)
#data = [[line, ''] for line in data]
if is_found:
#data.sort(key=lambda x: x[0])
write_csv_list(COMBINED_FN, data)
open('combined_new.dat', 'w').close() # for auto-translation in filetranslate
if __name__ == '__main__':
main()
import os, sys, struct
from glob import glob
from functools import partial
from filetranslate.service_fn import read_csv_dict, write_csv_list
from UnityPy import AssetsManager
from UnityPy.math import Vector2, Rectangle
from PIL import Image
from tqdm import tqdm
ROOT = os.path.abspath(os.getcwd()) # base directory
TYPES = ["MonoBehaviour", "Texture2D", "TextAsset", "Sprite"]
#DST = os.path.join(ROOT, "output") # destination folder
ASSETS = os.path.join(ROOT,"data.unity3d") # source folder or file
OUT_PATH = "out"
IN_TEXTS = "translation_out\\assets\\*.txt"
IN_IMAGES = "images\\**\\*.png"
IN_SPRITES = "images\\**\\*.png"
IN_MONO_BEHAVIOURS = "translation_out\\assets_mono\\**\\*.dat"
def base_name(path):
return os.path.splitext(os.path.basename(path))[0]
def main():
texts = glob(IN_TEXTS)
images = glob(IN_IMAGES, recursive=True)
sprites = glob(IN_SPRITES)
mbehavs = glob(IN_MONO_BEHAVIOURS, recursive=True)
def obj_modify(obj, asset, **kwargs):
objfmt = obj.type
data = obj.read()
name = f"{asset}-{obj.path_id}."
if objfmt == "Sprite":
fname = next((path for path in sprites if data.name == base_name(path)), None)
if not fname: return []
with open(fname, "rb") as img:
_img = Image.open(img)
if _img.height != int(data.m_Rect.height) or _img.width != int(data.m_Rect.width):
return [obj.path_id]
if data.name not in ["thisone"]: # replace to bounded rectangles and ignore mesh
data.m_RD.settingsRaw.value = 2
data.m_RD.textureRect = data.m_Rect
data.m_RD.textureRectOffset = Vector2(0, 0)
data.m_RD.uvTransform.X = 0
data.m_RD.uvTransform.Y = 0
data.m_RD.uvTransform.Z = 0
data.m_RD.uvTransform.W = 0
data.save()
else:
with open(fname + ".bin", "rb") as dat:
obj.set_raw_data(dat.read())
if objfmt == "Texture2D":
fname = next((path for path in images if data.name == base_name(path)), None)
if not fname: return []
with open(fname, "rb") as img:
_img = Image.open(img)
# it's not the same image even if names are the same
if _img.height != data.m_Height or _img.width != data.m_Width:
return [obj.path_id]
data.image = _img
data.save()
if objfmt == "TextAsset":
fname = next((path for path in texts if name in path), None)
if not fname: return []
with open(fname, "r", encoding="utf-8") as txt:
data.text = txt.read()
data.save()
elif objfmt == "MonoBehaviour":
fname = next((path for path in mbehavs if name in path), None)
if not fname: return []
with open(fname, "rb") as dat:
obj.set_raw_data(dat.read())
return [obj.path_id]
for file_name in glob(ASSETS):
print(f"Processing {file_name}...")
extension = os.path.splitext(file_name)[1]
am = AssetsManager(os.path.realpath(os.path.join(ROOT, file_name)))
am.out_path = OUT_PATH
am.progress_function = tqdm
am.process(partial(obj_modify, flies=mbehavs), TYPES)
print(f"Writing results to {am.out_path}{os.path.basename(file_name)}...")
am.save(pack= "lz4") # "none") #
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment