Last active
August 25, 2024 07:45
-
-
Save UserUnknownFactor/32dac4b1e8431864c281a9706eb1912f to your computer and use it in GitHub Desktop.
Unpack/repack tools for RPG Maker VX Ace games (Python port) [Not working]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# Alternative version that doesn't use serialization/deserialization libraries | |
# it modifies files directly instead. | |
import argparse, sys, os, glob, re, struct, csv, hashlib, math, zlib | |
from multiprocessing import Pool | |
DUMP_ALL_STRINGS = False | |
OUT_DIR = 'out' | |
STR_RAW_STR_RE = re.compile(b'I"') | |
STR_RAW_ARR_STR_RE = r'' | |
STR_GARBAGE_RE = re.compile(r'^_+$') | |
TAGS_RE = re.compile(r'\\\w{1,2}\[[^\]]+\]') | |
ESCAPE_CHAR = '¶' | |
DELIMITER_CHAR = '→' | |
CSV_ENCODING = "utf-8-sig" | |
GAME_FILE_ENC = 'utf-8' | |
REPLACEMENT_TAGS_CSV = 'replacement_tags.csv' | |
csv.register_dialect("stringdata", delimiter=DELIMITER_CHAR, quotechar='', doublequote=False, quoting=csv.QUOTE_NONE, escapechar=ESCAPE_CHAR) | |
RANGES = [ | |
{"from": ord(u"\u3300"), "to": ord(u"\u33ff")}, # compatibility ideographs | |
{"from": ord(u"\ufe30"), "to": ord(u"\ufe4f")}, # compatibility ideographs | |
{"from": ord(u"\uf900"), "to": ord(u"\ufaff")}, # compatibility ideographs | |
{"from": ord(u"\U0002F800"), "to": ord(u"\U0002fa1f")}, # compatibility ideographs | |
{'from': ord(u'\u3040'), 'to': ord(u'\u309f')}, # Japanese Hiragana | |
{"from": ord(u"\u30a0"), "to": ord(u"\u30ff")}, # Japanese Katakana | |
{"from": ord(u"\u2e80"), "to": ord(u"\u2eff")}, # cjk radicals supplement | |
{"from": ord(u"\u4e00"), "to": ord(u"\u9fff")}, | |
{"from": ord(u"\u3400"), "to": ord(u"\u4dbf")}, | |
{"from": ord(u"\U00020000"), "to": ord(u"\U0002a6df")}, | |
{"from": ord(u"\U0002a700"), "to": ord(u"\U0002b73f")}, | |
{"from": ord(u"\U0002b740"), "to": ord(u"\U0002b81f")}, | |
{"from": ord(u"\U0002b820"), "to": ord(u"\U0002ceaf")} # included as of Unicode 8.0 | |
] | |
#STR_ENG_RE = re.compile(r'^[0-9a-zA-Z_.,;\-\(\[\]\) @\"\']+$') | |
def is_cjk_substring(string): | |
""" Checks if the given string contains Japanese text. """ | |
#return not STR_ENG_RE.match(string) | |
for char in string: | |
if any(range["from"] <= ord(char) <= range["to"] for range in RANGES): | |
return True | |
return False | |
def tag_hash(string, str_enc="utf-8", hash_len=7): | |
""" Generates a short English tag for MTL from any kind of string. | |
""" | |
if len(string) < 1: return '' | |
d = hashlib.sha1(string.encode(str_enc)).digest() | |
s = '' | |
n_chars = 26 + 10 | |
for i in range(0, hash_len): | |
x = d[i] % n_chars | |
#s += chr(ord('a') + x) # lowercase letters, n_chars = 26 | |
s += (chr(ord('0') + x - 26) if x >= 26 else chr(ord('a') + x)) # numbers + lowercase, n_chars = 36 | |
#s += (chr(ord('A') + x - 26) if x >= 26 else chr(ord('a') + x)) # letters, n_chars = 52 | |
endchar = ',' | |
# indentation and endline checks | |
if re.search(r'\A(?:\/\/)?(?:\t+|\A {4,})', string): | |
endchar = ':' | |
elif re.search(r'\.\s*$', string): | |
endchar = '!' | |
return s + endchar | |
def column(arr, i): | |
return [row[i] for row in arr] | |
def split_data(i, data, positions): | |
dlen = len(data) | |
new_arr = [] | |
for pos in positions: | |
start, l = pos.split(',') | |
new_arr.append(data[start:min(dlen, start+l)]) | |
return data | |
def replace_string(data, string, replacer, once=True): | |
for i, substr in enumerate(data): | |
if substr == string: | |
data[i] = replacer | |
if once: break | |
return data | |
def get_sbyte(data, pos): return struct.unpack("b", data[pos:pos+1])[0] | |
def get_ubyte(data, pos): return struct.unpack("B", data[pos:pos+1])[0] | |
def read_long(data, pos): | |
""" Reads a long value from a byte array. """ | |
i = 1 | |
length = get_sbyte(data, pos) | |
if length == 0: | |
return (0, 1) | |
if 5 < length < 128: | |
return (length - 5, 1) | |
elif -129 < length < -5: | |
return (length + 5, 1) | |
result = 0 | |
factor = 1 | |
for s in range(abs(length)): | |
result += get_ubyte(data, pos + i) * factor | |
i += 1 | |
factor *= 256 | |
if length < 0: result = result - factor | |
return (result, i) | |
def make_sbyte(num): return struct.pack("b", num) | |
def make_ubyte(num): return struct.pack("B", num) | |
def make_long(num): | |
""" Packs a long value into a byte array. """ | |
data = b'' | |
if not num: data = b"\0" | |
elif 0 < num < 123: data = make_sbyte(num + 5) | |
elif -124 < num < 0: data = make_sbyte(num - 5) | |
else: | |
size = int(math.ceil(num.bit_length() / 8.0)) | |
if size > 5: raise Exception("%d too long for serialization" % num) | |
original_obj = num | |
factor = 256 ** size | |
if num < 0 and num == -factor: | |
size -= 1 | |
num += factor / 256 | |
elif num < 0: | |
num += factor | |
sign = int(math.copysign(size, original_obj)) | |
data = make_sbyte(sign) | |
for i in range(size): | |
data += make_ubyte(num % 256) | |
num //= 256 | |
return data | |
def read_csv_dict(fn): | |
if os.path.isfile(fn): | |
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f: | |
return dict(csv.reader(f, 'stringdata')) | |
else: | |
return dict() | |
def read_csv_list(fn): | |
if os.path.isfile(fn): | |
with open(fn, 'r', newline='', encoding=CSV_ENCODING) as f: | |
return list(csv.reader(f, 'stringdata')) | |
else: | |
return list() | |
def merge_dicts(*dict_args): | |
result = {} | |
for dictionary in dict_args: | |
result.update(dictionary) | |
return result | |
def write_csv_list(fn, lst, mode='w'): | |
if len(lst) == 0: return | |
with open(fn, mode, newline='', encoding=CSV_ENCODING) as f: | |
writer = csv.writer(f, 'stringdata') | |
for row in lst: | |
writer.writerow(row) | |
def unpack(name): | |
csv_filename = os.path.splitext(name)[0] + '_strings.csv' | |
if os.path.isfile(csv_filename): return | |
data_array = list() | |
string_tags = dict() | |
with open(name, 'rb') as f: | |
data = f.read() | |
i = 0 | |
for m in STR_RAW_STR_RE.finditer(data): | |
str_pos = m.start() + 2 | |
str_size, head_size = read_long(data, str_pos) | |
str_data = data[(str_pos + head_size):(str_pos + str_size + head_size)] | |
str_len = len(str_data) # str_len is in bytes, not characters | |
if str_len == 0: continue | |
i += 1 | |
if 'Scripts.rvdata2' in name: | |
if i<4: continue | |
try: | |
str_data = zlib.decompress(str_data) | |
except Exception as e: | |
#print("Decompression", i, "FAIL", str_data[:10]) | |
pass | |
try: | |
str_data = str_data.decode(GAME_FILE_ENC) | |
#print("Stream", i, "OK", str_data[:10]) | |
except: | |
#print("Stream", i, "FAIL", str_data[:10]) | |
pass | |
data_array.append([str_data, '', '%d,%d' % (str_pos, str_len)]) | |
continue | |
try: | |
str_data = str_data.decode(GAME_FILE_ENC) | |
except: | |
str_data = '' | |
str_len = 0 | |
pass | |
not_japanese = not is_cjk_substring(str_data) | |
if (STR_GARBAGE_RE.match(str_data) or not_japanese) and not DUMP_ALL_STRINGS: | |
continue | |
for tag in TAGS_RE.findall(str_data): | |
string_tags[tag] = tag_hash(tag) | |
data_array.append([str_data, '', '%d,%d' % (str_pos, str_len)]) | |
if len(data_array): | |
write_csv_list(csv_filename, data_array) | |
if len(string_tags): | |
write_csv_list(REPLACEMENT_TAGS_CSV, [x for x in string_tags.items()], 'a') | |
print("{}... OK".format(name.replace(os.sep, "/"))) | |
def repack(name): | |
if not os.path.isdir(OUT_DIR): | |
os.mkdir(OUT_DIR) | |
if not os.path.isdir(os.path.join(OUT_DIR, "Data")): | |
os.mkdir(os.path.join(OUT_DIR, "Data")) | |
csv_filename = os.path.splitext(name)[0] + '_strings.csv' | |
out_name = os.path.join(OUT_DIR, name) | |
if os.path.isfile(out_name): return | |
strings_list = read_csv_list(csv_filename) | |
with open(name, 'rb') as f: | |
data = f.read() | |
new_data = [] | |
last_pos = 0 | |
i_last_string = len(strings_list) - 1 | |
for i, row in enumerate(strings_list): | |
if len(row) != 3: continue | |
pos, data_len = row[2].split(',') # data_start_position,data_length[,length_var_bytesize] | |
pos, data_len = int(pos), int(data_len) | |
len_size = len(make_long(data_len)) | |
new_data.append(bytes(data[last_pos:pos])) | |
new_data.append(bytes(data[pos:(pos + len_size + data_len)])) | |
last_pos = pos + len_size + data_len | |
if i == i_last_string: | |
new_data.append(bytes(data[last_pos:])) | |
for i, row in enumerate(strings_list): | |
pos, data_len = row[2].split(',') | |
pos, data_len = int(pos), int(data_len) | |
if 'Scripts.rvdata2' in name and i % 2 == 0: | |
new_str = zlib.compress(row[0].encode(GAME_FILE_ENC)) | |
new_data[i] = new_str | |
break | |
else: | |
new_str = row[1].encode(GAME_FILE_ENC) | |
new_len = len(new_str) | |
new_str = make_long(new_len) + new_str | |
#print("pos", pos, "length:", data_len, "->", new_len) | |
for j, old_data in enumerate(new_data): | |
if old_data == make_long(data_len) + row[0].encode(GAME_FILE_ENC): | |
new_data[j] = make_long(data_len) + row[1].encode(GAME_FILE_ENC) | |
break | |
with open(out_name, 'wb') as f: | |
f.write(b''.join(new_data)) | |
print("{}... OK".format(out_name.replace(os.sep, "/"))) | |
rvdata_file = os.path.join(os.getcwd(), OUT_DIR, os.path.splitext(name)[0] + '.rvdata2') | |
if os.path.isfile(rvdata_file) or len(new_data) == 0: return | |
with open(rvdata_file, 'wb') as f: | |
f.write(b''.join(new_data)) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-e", help="Extract rvdata2 strings", action="store_true") | |
parser.add_argument("-r", help="Replace rvdata2 strings", action="store_true") | |
if len(sys.argv) < 1: | |
print('RVdata2 string dump tool v1.0') | |
parser.print_help(sys.stderr) | |
return | |
app_args = parser.parse_args() | |
if app_args.e: | |
print("Extracting strings...\n") | |
else: | |
print("Replacing strings...\n") | |
DATA_NAMES = list(glob.glob('Data' + os.sep + '*.rvdata2')) | |
#for name in DATA_NAMES: | |
# repack(name) if app_args.r else unpack(name) | |
with Pool() as p: | |
p.map(repack, DATA_NAMES) if app_args.r else p.map(unpack, DATA_NAMES) | |
if app_args.e: | |
string_tags = list(read_csv_dict(REPLACEMENT_TAGS_CSV).items()) | |
string_tags.sort(key=lambda l: len(l[0]), reverse=True) | |
if len(string_tags): | |
write_csv_list(REPLACEMENT_TAGS_CSV, string_tags) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jsonpickle | |
rubymarshal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys, os, re, zlib, glob | |
import jsonpickle | |
from rubymarshal import writer, reader | |
def write_rvdata2(fn, data): | |
with open(fn, 'wb') as f: | |
return writer.write(f, data) | |
def load_rvdata2(file): | |
with open(file, 'rb') as f: | |
return reader.load(f) | |
def load_yaml(fn): | |
with open(fn, 'r', encoding='utf-8-sig') as f: | |
return jsonpickle.decode(f.read()) | |
DATA_NAMES = [ | |
"Actors", "Animations", "Armors", | |
"Classes", "CommonEvents", "Enemies", | |
"Items", "MapInfos", "Skills", | |
"States", "System", "Tilesets", | |
"Troops", "Weapons" | |
] | |
if not os.path.isdir('out/'): | |
os.mkdir('out/') | |
if not os.path.isdir('out/Data/'): | |
os.mkdir('out/Data/') | |
for name in DATA_NAMES: | |
rvdata_filename = 'out/Data/' + name + '.rvdata2' | |
yaml_filename = 'Data/' + name + '.json' | |
if os.path.isfile(rvdata_filename): | |
continue | |
print(f"Packing... {rvdata_filename}") | |
data = load_yaml(yaml_filename) | |
write_rvdata2(rvdata_filename, data) | |
# Map | |
for yaml_filename in glob.glob('Data/Map[0-9][0-9][0-9].json'): | |
rvdata_filename = 'out/Data/' + os.path.splitext(os.path.basename(yaml_filename))[0] + '.rvdata2' | |
if os.path.isfile(rvdata_filename): | |
continue | |
print(f"Packing... {rvdata_filename}") | |
data = load_yaml(yaml_filename) | |
write_rvdata2(rvdata_filename, data) | |
# Scripts | |
def pack_scripts(): | |
rvdata_filename = 'out/Data/Scripts.rvdata2' | |
if os.path.isfile(rvdata_filename): | |
return | |
print("Packing... Data/Scripts.rvdata2") | |
data = load_rvdata2('Data/Scripts.rvdata2') | |
for script_filename in glob.glob('Data/Scripts/*.rb'): | |
with open(script_filename, 'r', encoding='utf-8') as f: | |
for src in re.sub(r'(\r\n|\r|\n)', "\n", f.read()).split("# -*- END_OF_SCRIPT -*-\n\n"): | |
name = os.path.splitext(os.path.basename(script_filename))[0] | |
if name == "( NONAME )": continue | |
head, lf, script = src.partition("\n") | |
if script.strip() == '': continue | |
_id = int(re.compile(r'id: (\d+)').findall(head)[0]) | |
#print(" Packing", _id, name) | |
_id = next((i for i, row in enumerate(data) if row[0] == _id), None) | |
data[_id][2] = zlib.compress(script.strip().encode('utf-8')) | |
write_rvdata2(rvdata_filename, data) | |
pack_scripts() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import os, zlib, glob, json | |
import jsonpickle | |
from rubymarshal import reader | |
jsonpickle.set_preferred_backend('json') | |
jsonpickle.set_encoder_options('json', ensure_ascii=False) | |
def load_rvdata2(file): | |
with open(file, 'rb') as f: | |
return reader.load(f) | |
def save_yaml(file, data): | |
with open(file, 'w', encoding='utf-8-sig') as f: | |
f.write(jsonpickle.encode(data, indent=4, numeric_keys=True)) | |
DATA_NAMES = [ | |
"Actors", "Animations", "Armors", | |
"Classes", "CommonEvents", "Enemies", | |
"Items", "MapInfos", "Skills", | |
"States", "System", "Tilesets", | |
"Troops", "Weapons" | |
] | |
for name in DATA_NAMES: | |
rvdata_filename = 'Data/' + name + '.rvdata2' | |
yaml_filename = 'Data/' + name + '.json' | |
if os.path.isfile(yaml_filename): | |
continue | |
print(f"Unpacking... {rvdata_filename}") | |
data = load_rvdata2(rvdata_filename) | |
save_yaml(yaml_filename, data) | |
# Map | |
for rvdata_filename in glob.glob('Data/Map[0-9][0-9][0-9].rvdata2'): | |
yaml_filename = 'Data/' + os.path.splitext(os.path.basename(rvdata_filename))[0] + '.json' | |
if os.path.isfile(yaml_filename): | |
continue | |
print("Unpacking... {}".format(rvdata_filename.replace(os.sep, "/"))) | |
data = load_rvdata2(rvdata_filename) | |
save_yaml(yaml_filename, data) | |
def cleanup_scripts_rb(): | |
if os.path.isdir('Data/Scripts'): | |
for fn in glob.glob('Data/Scripts/*.rb'): | |
os.remove(fn) | |
else: | |
os.mkdir('Data/Scripts') | |
# Scripts | |
def unpack_scripts(): | |
data = load_rvdata2('Data/Scripts.rvdata2') | |
indexes = [] | |
for _id, _name, _script in data: | |
_name = '( NONAME )' if not str(_name) else str(_name).strip() | |
indexes.append(_id) | |
rb_filename = 'Data/Scripts/' + _name + '.rb' | |
if os.path.isfile(rb_filename): | |
continue | |
#print("Unpacking... Scripts/" + _name) | |
with open(rb_filename, 'wb') as f: | |
f.write(f"# -*- id: {_id} -*-\n".encode('utf-8')) | |
f.write(zlib.decompress(bytes(_script))) | |
f.write(b"\n# -*- END_OF_SCRIPT -*-\n\n") | |
save_yaml('Data/Scripts.json', indexes) | |
print("Unpacking... Scripts") | |
cleanup_scripts_rb() | |
unpack_scripts() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This isn't really working so just for a reference.