Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active December 21, 2023 13:02
Show Gist options
  • Save UserUnknownFactor/5d8045b26b6efa8a49ad1ba5f46ecf30 to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/5d8045b26b6efa8a49ad1ba5f46ecf30 to your computer and use it in GitHub Desktop.
Python tool to dump all strings from Unreal .uexp file of unknown format
# coding: utf-8
# Python tool to dump all strings from Unreal .uexp files of unknown format
import argparse, sys, os, glob, re, struct, csv, hashlib, math, zlib
from multiprocessing import Pool
DUMP_ALL_STRINGS = False
DATA_NAMES = list(glob.glob('.\\**\\*.uexp', recursive=True)) + list(glob.glob('.\\**\\*.uasset', recursive=True))
ESCAPE_CHAR = '¶'
DELIMITER_CHAR = '→'
CSV_ENCODING = "utf-8-sig"
GAME_FILE_ENC = 'utf-8'
REPLACEMENT_TAGS_CSV = 'replacement_tags.csv'
csv.register_dialect("stringdata", delimiter=DELIMITER_CHAR, quotechar='\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR)
def tag_hash(string, str_enc="utf-8", hash_len=7):
""" Generates short English tags for MTL from any kind of string.
"""
if len(string) < 1: return ''
d = hashlib.sha1(string.encode(str_enc)).digest()
s = ''
n_chars = 26 + 10
for i in range(0, hash_len):
x = d[i] % n_chars
#s += chr(ord('a') + x) # lowercase letters, n_chars = 26
s += (chr(ord('0') + x - 26) if x >= 26 else chr(ord('a') + x)) # numbers + lowercase, n_chars = 36
#s += (chr(ord('A') + x - 26) if x >= 26 else chr(ord('a') + x)) # letters, n_chars = 52
endchar = ','
# indentation and endline checks
if re.search(r'\A(?:\/\/)?(?:\t+|\A {4,})', string):
endchar = ':'
elif re.search(r'\.\s*$', string):
endchar = '!'
return s + endchar
def read_csv_dict(fn):
if os.path.isfile(fn):
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
return dict(csv.reader(f, 'stringdata'))
else:
return dict()
def read_csv_list(fn):
if os.path.isfile(fn):
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
return list(csv.reader(f, 'stringdata'))
else:
return list()
def write_csv_list(fn, lst, mode='w'):
if len(lst) == 0: return
with open(fn, mode, newline='', encoding=CSV_ENCODING) as f:
writer = csv.writer(f, 'stringdata')
for row in lst:
writer.writerow(row)
def read_fstring(arr, pos=0) -> str:
length = struct.unpack('i', arr[pos:pos+4])[0]
UCS2: bool = length < 0
if UCS2:
length = -length
if length > 32767:
raise Exception(f"Impossible length of a string: {length}")
elif length == 0:
return ""
if UCS2:
byte = arr[pos + 4 : pos + 4 + length * 2 - 2]
return byte.decode("utf-16")
else:
byte = arr[pos + 4 : pos + 4 + length * 2 -1]
return byte.decode("utf-8")
def unpack(name):
onlyName = os.path.splitext(name)[0]
str_fn = onlyName + '_strings.csv'
if os.path.isfile(str_fn): return
data_array = list()
KEY_REGEXP = re.compile(b"([0-9A-F]{32})")
BACK_STR_REGEXP = re.compile(b"[\x29][\x01]([\x34\x1F])([\s\S]+?)(\0{1,2})$")
with open(name, mode="rb") as torg:
original_bytes = torg.read()
split_bytes = KEY_REGEXP.split(original_bytes)
for i, item in enumerate(split_bytes):
if KEY_REGEXP.match(item):
res = None
if int.from_bytes(split_bytes[i-1][-1:], "little") == 31:
res = BACK_STR_REGEXP.search(split_bytes[i-1][:-1])
if res:
text = res.group(2)
is_utf16 = res.group(1) == b'4'
try:
text = text.decode('utf-16' if is_utf16 else 'utf-8')
data_array.append([text, '', item.decode('ascii')])
except:
print(f"Can't extract string with Key: {item}")
pass
elif int.from_bytes(split_bytes[i-1][-4:], "little") == 33:
try:
text = read_fstring(split_bytes[i+1], 1)
data_array.append([text, '', item.decode('ascii')])
except:
print(f"Can't extract string with Key: {item}")
pass
status = "NONE"
if len(data_array):
write_csv_list(str_fn, data_array)
status = f"OK; ({len(data_array)} strings)"
print(f"{name}... {status}")
def repack(name):
pass
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-e", help="Extract Unreal strings", action="store_true")
parser.add_argument("-r", help="Replace Unreal strings (not implemented)", action="store_true")
if len(sys.argv) < 1:
print('Unreal String Dump Tool v1.0')
parser.print_help(sys.stderr)
return
app_args = parser.parse_args()
if app_args.e:
print("Extracting strings...\n")
else:
print("Replacing strings...\n")
#for name in DATA_NAMES:
# repack(name) if app_args.r and not app_args.e else unpack(name)
with Pool() as p:
p.map(repack, DATA_NAMES) if app_args.r else p.map(unpack, DATA_NAMES)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment