Created
July 8, 2015 15:19
-
-
Save nmlgc/d39befaa43360ae2b4aa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Touhou Community Reliant Automatic Patcher | |
# Scripts | |
# | |
# ---- | |
# | |
"""Locates hardcoded strings in a PE file according to a stringdefs.js file and | |
an optional blacklist. Dumps their relative positions in stringlocs format, as | |
well as lists of both newly found and garbage strings in the following format: | |
[ | |
"string", relative position, file offset, | |
... | |
]""" | |
import shutil | |
import os | |
import argparse | |
import json | |
import pefile | |
import re | |
import utils | |
parser = argparse.ArgumentParser( | |
description=__doc__ | |
) | |
parser.add_argument( | |
'pe', | |
help='Portable Executable file.', | |
type=str, | |
) | |
parser.add_argument( | |
'stringdefs', | |
help='Original string definitions.', | |
type=str, | |
) | |
parser.add_argument( | |
'-s', '--section', | |
metavar='.sect', | |
help='Name of the PE section to parse for strings (default: ".rdata").', | |
default='.rdata', | |
type=str, | |
) | |
parser.add_argument( | |
'-b', '--blacklist', | |
metavar='fn', | |
help='Optional file containing a JSON array of strings to ignore.', | |
default='', | |
type=str | |
) | |
parser.add_argument( | |
'-e', '--encoding', | |
help='String encoding (default: "cp932").', | |
default='cp932', | |
type=str | |
) | |
def json_loadf(fn): | |
if len(fn): | |
return utils.json_load(fn) | |
else: | |
return {} | |
def filter_string(string): | |
rx_garbage = re.compile(r'[\u0000-\u001f]') | |
if rx_garbage.match(string[-1:]): | |
return '' | |
else: | |
return rx_garbage.sub('', string) | |
def get_string_from_data(offset, data): | |
"""Get a string from within the data.""" | |
ret = bytearray() | |
try: | |
b = data[offset] | |
except IndexError: | |
return ret | |
while b: | |
ret.append(b) | |
offset += 1 | |
try: | |
b = data[offset] | |
except IndexError: | |
break | |
return ret | |
def push_new(array, pe, offset, str_raw, encoding): | |
try: | |
str_dec = str_raw.decode(encoding) | |
if len(filter_string(str_dec)) > 0 and str_dec not in array: | |
rva = "Rx{:x}".format(pe.get_rva_from_offset(offset)) | |
array.append(str_dec) | |
array.append(rva) | |
array.append(offset) | |
except UnicodeDecodeError: | |
pass | |
def locate_string(pe, data, data_offset, string, encoding, garbage): | |
"""Returns the RVA of [string].""" | |
rva = None | |
try: | |
str_enc = string.encode(encoding) + bytearray(1) | |
pos = data.find(str_enc) | |
except UnicodeEncodeError: | |
pos = -1 | |
if pos != -1: | |
rva = "Rx{:x}".format(pe.get_rva_from_offset(data_offset + pos)) | |
data[pos:pos+len(str_enc)] = bytearray(len(str_enc)) | |
# Clear every byte before the string until a null byte is reached. | |
# Everything in that range is sure to be non-string garbage. | |
g = pos - 1 | |
while (g != -1) and (data[g] != 0): | |
g = g - 1 | |
push_new(garbage, pe, data_offset + g, data[g + 1:pos], encoding) | |
data[g + 1:pos] = bytearray(pos - (g + 1)) | |
return rva | |
def locate(data, data_offset, stringdefs, blacklist, encoding): | |
found = {} | |
garbage = [] | |
for key, value in stringdefs.items(): | |
if type(value) is str: | |
value = [value] | |
for i in value: | |
rva = locate_string(pe, data, data_offset, i, encoding, garbage) | |
if rva: | |
found[rva] = key | |
for string in blacklist: | |
locate_string(pe, data, data_offset, string, encoding, garbage) | |
return found, garbage | |
def parse(data, data_offset, encoding): | |
new = [] | |
i = 0 | |
while i < len(data): | |
str_raw = get_string_from_data(i, data) | |
push_new(new, pe, i + data_offset, str_raw, encoding) | |
i += len(str_raw) + 1 | |
return new | |
if __name__ == '__main__': | |
arg = parser.parse_args() | |
pe = pefile.PE(name=arg.pe) | |
stringdefs = json_loadf(arg.stringdefs) | |
blacklist = json_loadf(arg.blacklist) | |
blacklist = [x for x in blacklist if type(x) is str and x[:2] != 'Rx'] | |
rx_pseudodict = re.compile(r',\n\t"Rx([0-9a-f]+)",\n\t') | |
for i in pe.sections: | |
if i.Name.decode('ascii')[:len(arg.section)] == arg.section: | |
start = i.PointerToRawData | |
end = start + i.SizeOfRawData | |
data = bytearray(pe.__data__[start:end]) | |
found, garbage = locate( | |
data, start, stringdefs, blacklist, arg.encoding | |
) | |
new = parse(data, start, arg.encoding) | |
new_dump = json.dumps(new, **utils.json_dump_params) | |
garbage_dump = json.dumps(garbage, **utils.json_dump_params) | |
print( | |
'Found:\n' + | |
json.dumps(found, **utils.json_dump_params) + | |
'New:\n' + | |
rx_pseudodict.sub(r', "Rx\1", ', new_dump) + | |
'Garbage:\n' + | |
rx_pseudodict.sub(r', "Rx\1", ', garbage_dump) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment