Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active August 25, 2024 07:45
Show Gist options
  • Save UserUnknownFactor/32dac4b1e8431864c281a9706eb1912f to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/32dac4b1e8431864c281a9706eb1912f to your computer and use it in GitHub Desktop.
Unpack/repack tools for RPG Maker VX Ace games (Python port) [Not working]
# coding: utf-8
# Alternative version that doesn't use serialization/deserialization libraries
# it modifies files directly instead.
import argparse, sys, os, glob, re, struct, csv, hashlib, math, zlib
from multiprocessing import Pool
DUMP_ALL_STRINGS = False
OUT_DIR = 'out'
STR_RAW_STR_RE = re.compile(b'I"')
STR_RAW_ARR_STR_RE = r''
STR_GARBAGE_RE = re.compile(r'^_+$')
TAGS_RE = re.compile(r'\\\w{1,2}\[[^\]]+\]')
ESCAPE_CHAR = '¶'
DELIMITER_CHAR = '→'
CSV_ENCODING = "utf-8-sig"
GAME_FILE_ENC = 'utf-8'
REPLACEMENT_TAGS_CSV = 'replacement_tags.csv'
csv.register_dialect("stringdata", delimiter=DELIMITER_CHAR, quotechar='', doublequote=False, quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR)
RANGES = [
{"from": ord(u"\u3300"), "to": ord(u"\u33ff")}, # compatibility ideographs
{"from": ord(u"\ufe30"), "to": ord(u"\ufe4f")}, # compatibility ideographs
{"from": ord(u"\uf900"), "to": ord(u"\ufaff")}, # compatibility ideographs
{"from": ord(u"\U0002F800"), "to": ord(u"\U0002fa1f")}, # compatibility ideographs
{'from': ord(u'\u3040'), 'to': ord(u'\u309f')}, # Japanese Hiragana
{"from": ord(u"\u30a0"), "to": ord(u"\u30ff")}, # Japanese Katakana
{"from": ord(u"\u2e80"), "to": ord(u"\u2eff")}, # cjk radicals supplement
{"from": ord(u"\u4e00"), "to": ord(u"\u9fff")},
{"from": ord(u"\u3400"), "to": ord(u"\u4dbf")},
{"from": ord(u"\U00020000"), "to": ord(u"\U0002a6df")},
{"from": ord(u"\U0002a700"), "to": ord(u"\U0002b73f")},
{"from": ord(u"\U0002b740"), "to": ord(u"\U0002b81f")},
{"from": ord(u"\U0002b820"), "to": ord(u"\U0002ceaf")} # included as of Unicode 8.0
]
#STR_ENG_RE = re.compile(r'^[0-9a-zA-Z_.,;\-\(\[\]\) @\"\']+$')
def is_cjk_substring(string):
""" Checks if the given string contains Japanese text. """
#return not STR_ENG_RE.match(string)
for char in string:
if any(range["from"] <= ord(char) <= range["to"] for range in RANGES):
return True
return False
def tag_hash(string, str_enc="utf-8", hash_len=7):
""" Generates a short English tag for MTL from any kind of string.
"""
if len(string) < 1: return ''
d = hashlib.sha1(string.encode(str_enc)).digest()
s = ''
n_chars = 26 + 10
for i in range(0, hash_len):
x = d[i] % n_chars
#s += chr(ord('a') + x) # lowercase letters, n_chars = 26
s += (chr(ord('0') + x - 26) if x >= 26 else chr(ord('a') + x)) # numbers + lowercase, n_chars = 36
#s += (chr(ord('A') + x - 26) if x >= 26 else chr(ord('a') + x)) # letters, n_chars = 52
endchar = ','
# indentation and endline checks
if re.search(r'\A(?:\/\/)?(?:\t+|\A {4,})', string):
endchar = ':'
elif re.search(r'\.\s*$', string):
endchar = '!'
return s + endchar
def column(arr, i):
return [row[i] for row in arr]
def split_data(i, data, positions):
dlen = len(data)
new_arr = []
for pos in positions:
start, l = pos.split(',')
new_arr.append(data[start:min(dlen, start+l)])
return data
def replace_string(data, string, replacer, once=True):
for i, substr in enumerate(data):
if substr == string:
data[i] = replacer
if once: break
return data
def get_sbyte(data, pos): return struct.unpack("b", data[pos:pos+1])[0]
def get_ubyte(data, pos): return struct.unpack("B", data[pos:pos+1])[0]
def read_long(data, pos):
""" Reads a long value from a byte array. """
i = 1
length = get_sbyte(data, pos)
if length == 0:
return (0, 1)
if 5 < length < 128:
return (length - 5, 1)
elif -129 < length < -5:
return (length + 5, 1)
result = 0
factor = 1
for s in range(abs(length)):
result += get_ubyte(data, pos + i) * factor
i += 1
factor *= 256
if length < 0: result = result - factor
return (result, i)
def make_sbyte(num): return struct.pack("b", num)
def make_ubyte(num): return struct.pack("B", num)
def make_long(num):
""" Packs a long value into a byte array. """
data = b''
if not num: data = b"\0"
elif 0 < num < 123: data = make_sbyte(num + 5)
elif -124 < num < 0: data = make_sbyte(num - 5)
else:
size = int(math.ceil(num.bit_length() / 8.0))
if size > 5: raise Exception("%d too long for serialization" % num)
original_obj = num
factor = 256 ** size
if num < 0 and num == -factor:
size -= 1
num += factor / 256
elif num < 0:
num += factor
sign = int(math.copysign(size, original_obj))
data = make_sbyte(sign)
for i in range(size):
data += make_ubyte(num % 256)
num //= 256
return data
def read_csv_dict(fn):
if os.path.isfile(fn):
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
return dict(csv.reader(f, 'stringdata'))
else:
return dict()
def read_csv_list(fn):
if os.path.isfile(fn):
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f:
return list(csv.reader(f, 'stringdata'))
else:
return list()
def merge_dicts(*dict_args):
result = {}
for dictionary in dict_args:
result.update(dictionary)
return result
def write_csv_list(fn, lst, mode='w'):
if len(lst) == 0: return
with open(fn, mode, newline='', encoding=CSV_ENCODING) as f:
writer = csv.writer(f, 'stringdata')
for row in lst:
writer.writerow(row)
def unpack(name):
csv_filename = os.path.splitext(name)[0] + '_strings.csv'
if os.path.isfile(csv_filename): return
data_array = list()
string_tags = dict()
with open(name, 'rb') as f:
data = f.read()
i = 0
for m in STR_RAW_STR_RE.finditer(data):
str_pos = m.start() + 2
str_size, head_size = read_long(data, str_pos)
str_data = data[(str_pos + head_size):(str_pos + str_size + head_size)]
str_len = len(str_data) # str_len is in bytes, not characters
if str_len == 0: continue
i += 1
if 'Scripts.rvdata2' in name:
if i<4: continue
try:
str_data = zlib.decompress(str_data)
except Exception as e:
#print("Decompression", i, "FAIL", str_data[:10])
pass
try:
str_data = str_data.decode(GAME_FILE_ENC)
#print("Stream", i, "OK", str_data[:10])
except:
#print("Stream", i, "FAIL", str_data[:10])
pass
data_array.append([str_data, '', '%d,%d' % (str_pos, str_len)])
continue
try:
str_data = str_data.decode(GAME_FILE_ENC)
except:
str_data = ''
str_len = 0
pass
not_japanese = not is_cjk_substring(str_data)
if (STR_GARBAGE_RE.match(str_data) or not_japanese) and not DUMP_ALL_STRINGS:
continue
for tag in TAGS_RE.findall(str_data):
string_tags[tag] = tag_hash(tag)
data_array.append([str_data, '', '%d,%d' % (str_pos, str_len)])
if len(data_array):
write_csv_list(csv_filename, data_array)
if len(string_tags):
write_csv_list(REPLACEMENT_TAGS_CSV, [x for x in string_tags.items()], 'a')
print("{}... OK".format(name.replace(os.sep, "/")))
def repack(name):
if not os.path.isdir(OUT_DIR):
os.mkdir(OUT_DIR)
if not os.path.isdir(os.path.join(OUT_DIR, "Data")):
os.mkdir(os.path.join(OUT_DIR, "Data"))
csv_filename = os.path.splitext(name)[0] + '_strings.csv'
out_name = os.path.join(OUT_DIR, name)
if os.path.isfile(out_name): return
strings_list = read_csv_list(csv_filename)
with open(name, 'rb') as f:
data = f.read()
new_data = []
last_pos = 0
i_last_string = len(strings_list) - 1
for i, row in enumerate(strings_list):
if len(row) != 3: continue
pos, data_len = row[2].split(',') # data_start_position,data_length[,length_var_bytesize]
pos, data_len = int(pos), int(data_len)
len_size = len(make_long(data_len))
new_data.append(bytes(data[last_pos:pos]))
new_data.append(bytes(data[pos:(pos + len_size + data_len)]))
last_pos = pos + len_size + data_len
if i == i_last_string:
new_data.append(bytes(data[last_pos:]))
for i, row in enumerate(strings_list):
pos, data_len = row[2].split(',')
pos, data_len = int(pos), int(data_len)
if 'Scripts.rvdata2' in name and i % 2 == 0:
new_str = zlib.compress(row[0].encode(GAME_FILE_ENC))
new_data[i] = new_str
break
else:
new_str = row[1].encode(GAME_FILE_ENC)
new_len = len(new_str)
new_str = make_long(new_len) + new_str
#print("pos", pos, "length:", data_len, "->", new_len)
for j, old_data in enumerate(new_data):
if old_data == make_long(data_len) + row[0].encode(GAME_FILE_ENC):
new_data[j] = make_long(data_len) + row[1].encode(GAME_FILE_ENC)
break
with open(out_name, 'wb') as f:
f.write(b''.join(new_data))
print("{}... OK".format(out_name.replace(os.sep, "/")))
rvdata_file = os.path.join(os.getcwd(), OUT_DIR, os.path.splitext(name)[0] + '.rvdata2')
if os.path.isfile(rvdata_file) or len(new_data) == 0: return
with open(rvdata_file, 'wb') as f:
f.write(b''.join(new_data))
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-e", help="Extract rvdata2 strings", action="store_true")
parser.add_argument("-r", help="Replace rvdata2 strings", action="store_true")
if len(sys.argv) < 1:
print('RVdata2 string dump tool v1.0')
parser.print_help(sys.stderr)
return
app_args = parser.parse_args()
if app_args.e:
print("Extracting strings...\n")
else:
print("Replacing strings...\n")
DATA_NAMES = list(glob.glob('Data' + os.sep + '*.rvdata2'))
#for name in DATA_NAMES:
# repack(name) if app_args.r else unpack(name)
with Pool() as p:
p.map(repack, DATA_NAMES) if app_args.r else p.map(unpack, DATA_NAMES)
if app_args.e:
string_tags = list(read_csv_dict(REPLACEMENT_TAGS_CSV).items())
string_tags.sort(key=lambda l: len(l[0]), reverse=True)
if len(string_tags):
write_csv_list(REPLACEMENT_TAGS_CSV, string_tags)
if __name__ == '__main__':
main()
jsonpickle
rubymarshal
# coding: utf-8
import sys, os, re, zlib, glob
import jsonpickle
from rubymarshal import writer, reader
def write_rvdata2(fn, data):
with open(fn, 'wb') as f:
return writer.write(f, data)
def load_rvdata2(file):
with open(file, 'rb') as f:
return reader.load(f)
def load_yaml(fn):
with open(fn, 'r', encoding='utf-8-sig') as f:
return jsonpickle.decode(f.read())
DATA_NAMES = [
"Actors", "Animations", "Armors",
"Classes", "CommonEvents", "Enemies",
"Items", "MapInfos", "Skills",
"States", "System", "Tilesets",
"Troops", "Weapons"
]
if not os.path.isdir('out/'):
os.mkdir('out/')
if not os.path.isdir('out/Data/'):
os.mkdir('out/Data/')
for name in DATA_NAMES:
rvdata_filename = 'out/Data/' + name + '.rvdata2'
yaml_filename = 'Data/' + name + '.json'
if os.path.isfile(rvdata_filename):
continue
print(f"Packing... {rvdata_filename}")
data = load_yaml(yaml_filename)
write_rvdata2(rvdata_filename, data)
# Map
for yaml_filename in glob.glob('Data/Map[0-9][0-9][0-9].json'):
rvdata_filename = 'out/Data/' + os.path.splitext(os.path.basename(yaml_filename))[0] + '.rvdata2'
if os.path.isfile(rvdata_filename):
continue
print(f"Packing... {rvdata_filename}")
data = load_yaml(yaml_filename)
write_rvdata2(rvdata_filename, data)
# Scripts
def pack_scripts():
rvdata_filename = 'out/Data/Scripts.rvdata2'
if os.path.isfile(rvdata_filename):
return
print("Packing... Data/Scripts.rvdata2")
data = load_rvdata2('Data/Scripts.rvdata2')
for script_filename in glob.glob('Data/Scripts/*.rb'):
with open(script_filename, 'r', encoding='utf-8') as f:
for src in re.sub(r'(\r\n|\r|\n)', "\n", f.read()).split("# -*- END_OF_SCRIPT -*-\n\n"):
name = os.path.splitext(os.path.basename(script_filename))[0]
if name == "( NONAME )": continue
head, lf, script = src.partition("\n")
if script.strip() == '': continue
_id = int(re.compile(r'id: (\d+)').findall(head)[0])
#print(" Packing", _id, name)
_id = next((i for i, row in enumerate(data) if row[0] == _id), None)
data[_id][2] = zlib.compress(script.strip().encode('utf-8'))
write_rvdata2(rvdata_filename, data)
pack_scripts()
# coding: utf-8
import os, zlib, glob, json
import jsonpickle
from rubymarshal import reader
jsonpickle.set_preferred_backend('json')
jsonpickle.set_encoder_options('json', ensure_ascii=False)
def load_rvdata2(file):
with open(file, 'rb') as f:
return reader.load(f)
def save_yaml(file, data):
with open(file, 'w', encoding='utf-8-sig') as f:
f.write(jsonpickle.encode(data, indent=4, numeric_keys=True))
DATA_NAMES = [
"Actors", "Animations", "Armors",
"Classes", "CommonEvents", "Enemies",
"Items", "MapInfos", "Skills",
"States", "System", "Tilesets",
"Troops", "Weapons"
]
for name in DATA_NAMES:
rvdata_filename = 'Data/' + name + '.rvdata2'
yaml_filename = 'Data/' + name + '.json'
if os.path.isfile(yaml_filename):
continue
print(f"Unpacking... {rvdata_filename}")
data = load_rvdata2(rvdata_filename)
save_yaml(yaml_filename, data)
# Map
for rvdata_filename in glob.glob('Data/Map[0-9][0-9][0-9].rvdata2'):
yaml_filename = 'Data/' + os.path.splitext(os.path.basename(rvdata_filename))[0] + '.json'
if os.path.isfile(yaml_filename):
continue
print("Unpacking... {}".format(rvdata_filename.replace(os.sep, "/")))
data = load_rvdata2(rvdata_filename)
save_yaml(yaml_filename, data)
def cleanup_scripts_rb():
if os.path.isdir('Data/Scripts'):
for fn in glob.glob('Data/Scripts/*.rb'):
os.remove(fn)
else:
os.mkdir('Data/Scripts')
# Scripts
def unpack_scripts():
data = load_rvdata2('Data/Scripts.rvdata2')
indexes = []
for _id, _name, _script in data:
_name = '( NONAME )' if not str(_name) else str(_name).strip()
indexes.append(_id)
rb_filename = 'Data/Scripts/' + _name + '.rb'
if os.path.isfile(rb_filename):
continue
#print("Unpacking... Scripts/" + _name)
with open(rb_filename, 'wb') as f:
f.write(f"# -*- id: {_id} -*-\n".encode('utf-8'))
f.write(zlib.decompress(bytes(_script)))
f.write(b"\n# -*- END_OF_SCRIPT -*-\n\n")
save_yaml('Data/Scripts.json', indexes)
print("Unpacking... Scripts")
cleanup_scripts_rb()
unpack_scripts()
@UserUnknownFactor
Copy link
Author

This isn't really working so just for a reference.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment