UserUnknownFactor · December 21, 2023 13:02
diff --git a/dump_unreal_strings.py b/dump_unreal_strings.py
 # coding: utf-8
 # Python tool to dump all strings from Unreal .uexp files of unknown format
 import argparse, sys, os, glob, re, struct, csv, hashlib, math, zlib
 from multiprocessing import Pool

 DUMP_ALL_STRINGS = False
 DATA_NAMES = list(glob.glob('.\\**\\*.uexp', recursive=True)) + list(glob.glob('.\\**\\*.uasset', recursive=True))

 ESCAPE_CHAR = '¶'
 DELIMITER_CHAR = '→'
 CSV_ENCODING = "utf-8-sig"
 GAME_FILE_ENC = 'utf-8'
 REPLACEMENT_TAGS_CSV = 'replacement_tags.csv'
 csv.register_dialect("stringdata", delimiter=DELIMITER_CHAR, quotechar='\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR)

 def tag_hash(string, str_enc="utf-8", hash_len=7):
    """ Generates short English tags for MTL from any kind of string.
    """
    if len(string) < 1: return ''
    d = hashlib.sha1(string.encode(str_enc)).digest()
    s = ''
    n_chars = 26 + 10
    for i in range(0, hash_len):
        x = d[i] % n_chars
        #s += chr(ord('a') + x) # lowercase letters, n_chars = 26
        s += (chr(ord('0') + x - 26) if x >= 26 else chr(ord('a') + x)) # numbers + lowercase, n_chars = 36
        #s += (chr(ord('A') + x - 26) if x >= 26 else chr(ord('a') + x)) # letters, n_chars = 52

    endchar = ','
    # indentation and endline checks
    if re.search(r'\A(?:\/\/)?(?:\t+|\A {4,})', string):
        endchar = ':'
    elif re.search(r'\.\s*$', string):
        endchar = '!'
    return s + endchar

 def read_csv_dict(fn):
    if os.path.isfile(fn):
        with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
            return dict(csv.reader(f, 'stringdata'))
    else:
        return dict()

 def read_csv_list(fn):
    if os.path.isfile(fn):
        with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
            return list(csv.reader(f, 'stringdata'))
    else:
        return list()

 def write_csv_list(fn, lst, mode='w'):
    if len(lst) == 0: return
    with open(fn, mode, newline='', encoding=CSV_ENCODING) as f:
        writer = csv.writer(f, 'stringdata')
        for row in lst:
            writer.writerow(row)

 def read_fstring(arr, pos=0) -> str:
    length = struct.unpack('i', arr[pos:pos+4])[0]
    UCS2: bool = length < 0
    if UCS2:
        length = -length
    if length > 32767:
        raise Exception(f"Impossible length of a string: {length}")
    elif length == 0:
        return ""
    if UCS2:
        byte = arr[pos + 4 : pos + 4 + length * 2 - 2]
        return byte.decode("utf-16")
    else:
        byte = arr[pos + 4 : pos + 4 + length * 2 -1]
        return byte.decode("utf-8")

 def unpack(name):
    onlyName = os.path.splitext(name)[0]
    str_fn =  onlyName + '_strings.csv'
    if os.path.isfile(str_fn): return

    data_array = list()
    KEY_REGEXP = re.compile(b"([0-9A-F]{32})")
    BACK_STR_REGEXP = re.compile(b"[\x29][\x01]([\x34\x1F])([\s\S]+?)(\0{1,2})$")

    with open(name, mode="rb") as torg:
        original_bytes = torg.read()

    split_bytes = KEY_REGEXP.split(original_bytes)

    for i, item in enumerate(split_bytes):
        if KEY_REGEXP.match(item):
            res = None
            if int.from_bytes(split_bytes[i-1][-1:], "little") == 31:
                res = BACK_STR_REGEXP.search(split_bytes[i-1][:-1])
                if res:
                    text = res.group(2)
                    is_utf16 = res.group(1) == b'4'
                    try:
                        text = text.decode('utf-16' if is_utf16 else 'utf-8')
                        data_array.append([text, '', item.decode('ascii')])
                    except:
                        print(f"Can't extract string with Key: {item}")
                        pass
            elif int.from_bytes(split_bytes[i-1][-4:], "little") == 33:
                try:
                    text = read_fstring(split_bytes[i+1], 1)
                    data_array.append([text, '', item.decode('ascii')])
                except:
                    print(f"Can't extract string with Key: {item}")
                    pass
    status = "NONE"
    if len(data_array):
        write_csv_list(str_fn, data_array)
        status = f"OK; ({len(data_array)} strings)"
        print(f"{name}... {status}")

 def repack(name):
    pass

 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", help="Extract Unreal strings", action="store_true")
    parser.add_argument("-r", help="Replace Unreal strings (not implemented)", action="store_true")
    if len(sys.argv) < 1:
        print('Unreal String Dump Tool v1.0')
        parser.print_help(sys.stderr)
        return

    app_args = parser.parse_args()
    if app_args.e:
        print("Extracting strings...\n")
    else:
        print("Replacing strings...\n")
    #for name in DATA_NAMES:
    #    repack(name) if app_args.r and not app_args.e else unpack(name)
    with Pool() as p:
        p.map(repack, DATA_NAMES) if app_args.r else p.map(unpack, DATA_NAMES)

 if __name__ == '__main__':
    main()
	# coding: utf-8
	# Python tool to dump all strings from Unreal .uexp files of unknown format
	import argparse, sys, os, glob, re, struct, csv, hashlib, math, zlib
	from multiprocessing import Pool

	DUMP_ALL_STRINGS = False
	DATA_NAMES = list(glob.glob('.\\*\\.uexp', recursive=True)) + list(glob.glob('.\\*\\.uasset', recursive=True))

	ESCAPE_CHAR = '¶'
	DELIMITER_CHAR = '→'
	CSV_ENCODING = "utf-8-sig"
	GAME_FILE_ENC = 'utf-8'
	REPLACEMENT_TAGS_CSV = 'replacement_tags.csv'
	csv.register_dialect("stringdata", delimiter=DELIMITER_CHAR, quotechar='\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR)

	def tag_hash(string, str_enc="utf-8", hash_len=7):
	""" Generates short English tags for MTL from any kind of string.
	"""
	if len(string) < 1: return ''
	d = hashlib.sha1(string.encode(str_enc)).digest()
	s = ''
	n_chars = 26 + 10
	for i in range(0, hash_len):
	x = d[i] % n_chars
	#s += chr(ord('a') + x) # lowercase letters, n_chars = 26
	s += (chr(ord('0') + x - 26) if x >= 26 else chr(ord('a') + x)) # numbers + lowercase, n_chars = 36
	#s += (chr(ord('A') + x - 26) if x >= 26 else chr(ord('a') + x)) # letters, n_chars = 52

	endchar = ','
	# indentation and endline checks
	if re.search(r'\A(?:\/\/)?(?:\t+\|\A {4,})', string):
	endchar = ':'
	elif re.search(r'\.\s*$', string):
	endchar = '!'
	return s + endchar

	def read_csv_dict(fn):
	if os.path.isfile(fn):
	with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
	return dict(csv.reader(f, 'stringdata'))
	else:
	return dict()

	def read_csv_list(fn):
	if os.path.isfile(fn):
	with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
	return list(csv.reader(f, 'stringdata'))
	else:
	return list()

	def write_csv_list(fn, lst, mode='w'):
	if len(lst) == 0: return
	with open(fn, mode, newline='', encoding=CSV_ENCODING) as f:
	writer = csv.writer(f, 'stringdata')
	for row in lst:
	writer.writerow(row)

	def read_fstring(arr, pos=0) -> str:
	length = struct.unpack('i', arr[pos:pos+4])[0]
	UCS2: bool = length < 0
	if UCS2:
	length = -length
	if length > 32767:
	raise Exception(f"Impossible length of a string: {length}")
	elif length == 0:
	return ""
	if UCS2:
	byte = arr[pos + 4 : pos + 4 + length * 2 - 2]
	return byte.decode("utf-16")
	else:
	byte = arr[pos + 4 : pos + 4 + length * 2 -1]
	return byte.decode("utf-8")

	def unpack(name):
	onlyName = os.path.splitext(name)[0]
	str_fn = onlyName + '_strings.csv'
	if os.path.isfile(str_fn): return

	data_array = list()
	KEY_REGEXP = re.compile(b"([0-9A-F]{32})")
	BACK_STR_REGEXP = re.compile(b"[\x29][\x01]([\x34\x1F])([\s\S]+?)(\0{1,2})$")

	with open(name, mode="rb") as torg:
	original_bytes = torg.read()

	split_bytes = KEY_REGEXP.split(original_bytes)

	for i, item in enumerate(split_bytes):
	if KEY_REGEXP.match(item):
	res = None
	if int.from_bytes(split_bytes[i-1][-1:], "little") == 31:
	res = BACK_STR_REGEXP.search(split_bytes[i-1][:-1])
	if res:
	text = res.group(2)
	is_utf16 = res.group(1) == b'4'
	try:
	text = text.decode('utf-16' if is_utf16 else 'utf-8')
	data_array.append([text, '', item.decode('ascii')])
	except:
	print(f"Can't extract string with Key: {item}")
	pass
	elif int.from_bytes(split_bytes[i-1][-4:], "little") == 33:
	try:
	text = read_fstring(split_bytes[i+1], 1)
	data_array.append([text, '', item.decode('ascii')])
	except:
	print(f"Can't extract string with Key: {item}")
	pass
	status = "NONE"
	if len(data_array):
	write_csv_list(str_fn, data_array)
	status = f"OK; ({len(data_array)} strings)"
	print(f"{name}... {status}")

	def repack(name):
	pass

	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("-e", help="Extract Unreal strings", action="store_true")
	parser.add_argument("-r", help="Replace Unreal strings (not implemented)", action="store_true")
	if len(sys.argv) < 1:
	print('Unreal String Dump Tool v1.0')
	parser.print_help(sys.stderr)
	return

	app_args = parser.parse_args()
	if app_args.e:
	print("Extracting strings...\n")
	else:
	print("Replacing strings...\n")
	#for name in DATA_NAMES:
	# repack(name) if app_args.r and not app_args.e else unpack(name)
	with Pool() as p:
	p.map(repack, DATA_NAMES) if app_args.r else p.map(unpack, DATA_NAMES)

	if __name__ == '__main__':
	main()