Last active
December 21, 2023 13:02
-
-
Save UserUnknownFactor/5d8045b26b6efa8a49ad1ba5f46ecf30 to your computer and use it in GitHub Desktop.
Python tool to dump all strings from Unreal .uexp file of unknown format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# Python tool to dump all strings from Unreal .uexp files of unknown format | |
import argparse, sys, os, glob, re, struct, csv, hashlib, math, zlib | |
from multiprocessing import Pool | |
DUMP_ALL_STRINGS = False | |
DATA_NAMES = list(glob.glob('.\\**\\*.uexp', recursive=True)) + list(glob.glob('.\\**\\*.uasset', recursive=True)) | |
ESCAPE_CHAR = '¶' | |
DELIMITER_CHAR = '→' | |
CSV_ENCODING = "utf-8-sig" | |
GAME_FILE_ENC = 'utf-8' | |
REPLACEMENT_TAGS_CSV = 'replacement_tags.csv' | |
csv.register_dialect("stringdata", delimiter=DELIMITER_CHAR, quotechar='\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR) | |
def tag_hash(string, str_enc="utf-8", hash_len=7): | |
""" Generates short English tags for MTL from any kind of string. | |
""" | |
if len(string) < 1: return '' | |
d = hashlib.sha1(string.encode(str_enc)).digest() | |
s = '' | |
n_chars = 26 + 10 | |
for i in range(0, hash_len): | |
x = d[i] % n_chars | |
#s += chr(ord('a') + x) # lowercase letters, n_chars = 26 | |
s += (chr(ord('0') + x - 26) if x >= 26 else chr(ord('a') + x)) # numbers + lowercase, n_chars = 36 | |
#s += (chr(ord('A') + x - 26) if x >= 26 else chr(ord('a') + x)) # letters, n_chars = 52 | |
endchar = ',' | |
# indentation and endline checks | |
if re.search(r'\A(?:\/\/)?(?:\t+|\A {4,})', string): | |
endchar = ':' | |
elif re.search(r'\.\s*$', string): | |
endchar = '!' | |
return s + endchar | |
def read_csv_dict(fn): | |
if os.path.isfile(fn): | |
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f: | |
return dict(csv.reader(f, 'stringdata')) | |
else: | |
return dict() | |
def read_csv_list(fn): | |
if os.path.isfile(fn): | |
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f: | |
return list(csv.reader(f, 'stringdata')) | |
else: | |
return list() | |
def write_csv_list(fn, lst, mode='w'): | |
if len(lst) == 0: return | |
with open(fn, mode, newline='', encoding=CSV_ENCODING) as f: | |
writer = csv.writer(f, 'stringdata') | |
for row in lst: | |
writer.writerow(row) | |
def read_fstring(arr, pos=0) -> str: | |
length = struct.unpack('i', arr[pos:pos+4])[0] | |
UCS2: bool = length < 0 | |
if UCS2: | |
length = -length | |
if length > 32767: | |
raise Exception(f"Impossible length of a string: {length}") | |
elif length == 0: | |
return "" | |
if UCS2: | |
byte = arr[pos + 4 : pos + 4 + length * 2 - 2] | |
return byte.decode("utf-16") | |
else: | |
byte = arr[pos + 4 : pos + 4 + length * 2 -1] | |
return byte.decode("utf-8") | |
def unpack(name): | |
onlyName = os.path.splitext(name)[0] | |
str_fn = onlyName + '_strings.csv' | |
if os.path.isfile(str_fn): return | |
data_array = list() | |
KEY_REGEXP = re.compile(b"([0-9A-F]{32})") | |
BACK_STR_REGEXP = re.compile(b"[\x29][\x01]([\x34\x1F])([\s\S]+?)(\0{1,2})$") | |
with open(name, mode="rb") as torg: | |
original_bytes = torg.read() | |
split_bytes = KEY_REGEXP.split(original_bytes) | |
for i, item in enumerate(split_bytes): | |
if KEY_REGEXP.match(item): | |
res = None | |
if int.from_bytes(split_bytes[i-1][-1:], "little") == 31: | |
res = BACK_STR_REGEXP.search(split_bytes[i-1][:-1]) | |
if res: | |
text = res.group(2) | |
is_utf16 = res.group(1) == b'4' | |
try: | |
text = text.decode('utf-16' if is_utf16 else 'utf-8') | |
data_array.append([text, '', item.decode('ascii')]) | |
except: | |
print(f"Can't extract string with Key: {item}") | |
pass | |
elif int.from_bytes(split_bytes[i-1][-4:], "little") == 33: | |
try: | |
text = read_fstring(split_bytes[i+1], 1) | |
data_array.append([text, '', item.decode('ascii')]) | |
except: | |
print(f"Can't extract string with Key: {item}") | |
pass | |
status = "NONE" | |
if len(data_array): | |
write_csv_list(str_fn, data_array) | |
status = f"OK; ({len(data_array)} strings)" | |
print(f"{name}... {status}") | |
def repack(name): | |
pass | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-e", help="Extract Unreal strings", action="store_true") | |
parser.add_argument("-r", help="Replace Unreal strings (not implemented)", action="store_true") | |
if len(sys.argv) < 1: | |
print('Unreal String Dump Tool v1.0') | |
parser.print_help(sys.stderr) | |
return | |
app_args = parser.parse_args() | |
if app_args.e: | |
print("Extracting strings...\n") | |
else: | |
print("Replacing strings...\n") | |
#for name in DATA_NAMES: | |
# repack(name) if app_args.r and not app_args.e else unpack(name) | |
with Pool() as p: | |
p.map(repack, DATA_NAMES) if app_args.r else p.map(unpack, DATA_NAMES) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment