Last active
December 1, 2022 05:17
-
-
Save cq674350529/74e5b6d31780882c54c80302172ad753 to your computer and use it in GitHub Desktop.
locate base addr by finding fixed xref addrs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
import binascii | |
import struct | |
import random | |
import string | |
import re | |
import inspect | |
FUNCTION_PROLOGS = { | |
"mipsl": # little endian | |
r"([\x00-\xff]{2}\x1c\x3c|[\x00-\xff]\x00\x1c\x3c[\x00-\xff]{2}\x9c\x27\x21\xe0\x99\x03)?" # lui $gp, xxx | li $gp, 0x11b0b0; addu $gp, $t9 | |
r"[\x00-\xff]{2}\xbd\x27" # addiu $sp, xxx | |
r"([\x00-\xff]{2}\x9c\x27)?" # li $gp, xxx | |
r"([\x00-\xff]{2}\xbf\xaf|[\x00-\xff]{2}\xbe\xaf)", # sw $ra, xxx | sw $fp, 0x14($sp) (for leaf function) | |
"mipsb": # big endian | |
r"(\x3c\x1c[\x00-\xff]{2}|\x3c\x1c[\x00-\xff]{2}\x27\x9c[\x00-\xff]{2}\x03\x99\xe0\x21)?" # lui $gp, xxx | li $gp, 0x11b0b0; addu $gp, $t9 | |
r"\x27\xbd[\x00-\xff]{2}" # addiu $sp, xxx | |
r"(\x27\x9c[\x00-\xff]{2})?" # li $gp, xxx | |
r"(\xaf\xbf[\x00-\xff]{2}|\xaf\xbe[\x00-\xff]{2})?", # sw $ra, xxx | sw $fp, 0x14($sp) (for leaf function) | |
} | |
def get_current_func_name(): | |
# return the name of function calling this | |
return inspect.stack()[1][3] | |
def find_reference_addresses(bin_path, start_addr=None, endian="<"): | |
with open(bin_path, "rb") as f: | |
content = f.read() | |
file_size = len(content) | |
# find all duplicate word patterns | |
pattern_result = [] | |
# (offset_even, offset_odd): pattern, offset, match_count | |
pattern_record = [[content[0:2], 0, 0], [content[2:4], 2, 0]] | |
for i in xrange(4, len(content), 2): | |
data = content[i:i+2] | |
index = (i / 2) % 2 | |
if data == pattern_record[index][0]: | |
pattern_record[index][2] += 1 | |
else: | |
if pattern_record[index][2] != 0: | |
pattern_result.append( | |
(pattern_record[index][0], pattern_record[index][1], pattern_record[index][2])) | |
pattern_record[index][0] = data | |
pattern_record[index][1] = i | |
pattern_record[index][2] = 0 | |
pattern_result.sort(key=lambda item: item[2], reverse=True) | |
all_addresses = [] | |
for item in pattern_result: | |
# word pattern: \x00\x00, or hit_count < 3 | |
if item[0] in ("\x00\x00", '\xff\xff') or item[2] < 3: | |
continue | |
# start_addr: 0x80000000, so match '\x80\x**\x**\x**' or '\x**\x**\x**\x80' | |
offset = item[1] | |
if endian == "<" and (offset / 2) % 2 == 0: | |
# only match '\x**\x**\x00\x80' | |
continue | |
elif endian == ">" and (offset / 2) % 2 == 1: | |
# only match '\x80\x00\x**\x**' | |
continue | |
print "[+] pattern: %s, offset: %#x, count: %d" % ( | |
binascii.hexlify(item[0]), item[1], item[2]) | |
if endian is None: | |
endian = "<" # default little endian | |
addresses = [] | |
for _ in range(item[2]): | |
if (offset/2) % 2 == 0: | |
addr = struct.unpack("%sI" % endian, content[offset:offset+4])[0] | |
else: | |
addr = struct.unpack("%sI" % endian, content[offset-2:offset+2])[0] | |
# filter out "bad" addresses | |
if start_addr is None or start_addr <= addr <= start_addr + file_size: | |
if addr not in addresses: | |
addresses.append(addr) | |
offset += 4 | |
if len(addresses) > 0: | |
print " ---> possiable addr: %s" % ( ",".join(map(lambda x: "%#x" % x, addresses))) | |
else: | |
print " ---> no valid address with this pattern" | |
all_addresses.extend(addresses) | |
if len(all_addresses) == 0: | |
print "[-] no addr patterns found!" | |
sys.exit(-1) | |
all_addresses = list(set(all_addresses)) | |
all_addresses.sort() | |
print "[+] All addresses: %s" % ",".join( | |
map(lambda x: "%#x" % x, all_addresses)) | |
return all_addresses | |
def choose_str_addresses(all_addresses, str_bound_addr, sample_size=20): | |
str_addresses = [] | |
code_addresses = [] | |
for item in all_addresses: | |
if item > str_bound_addr: | |
str_addresses.append(item) | |
else: | |
code_addresses.append(item) | |
str_addresses_size = len(str_addresses) | |
if str_addresses_size == 0: | |
print "[-] no strings reference address found!" | |
sys.exit(-1) | |
print "[+] String addresses (size:%d): %s" % (str_addresses_size, | |
",".join(map(lambda x: "%#x" % x, str_addresses))) | |
if sample_size >= str_addresses_size: | |
sample_size = str_addresses_size | |
# random select some string addresses, to avoid some addresses pointing to all '\x00' region | |
random_str_addresses = random.sample(str_addresses, sample_size) | |
print "[+] Choose string addresses (size: %d): %s" % (len( | |
random_str_addresses), ",".join(map(lambda x: "%#x" % x, random_str_addresses))) | |
return random_str_addresses | |
def find_base_address_via_string_xref(bin_path, xref_addrs, start_addr, end_addr, step=0x20): | |
with open(bin_path, "rb") as f: | |
content = f.read() | |
results = [] | |
while start_addr <= end_addr: | |
hit_count = 0 | |
for addr in xref_addrs: | |
# exclude those addrs pointing to all '\x00' | |
if content[addr - start_addr - 1] == '\x00' and \ | |
content[addr - start_addr] in string.printable and \ | |
content[addr - start_addr + 1] in string.printable: | |
hit_count += 1 | |
if hit_count > 0: | |
results.append((start_addr, hit_count)) | |
start_addr += step | |
results.sort(key=lambda x: x[1], reverse=True) | |
if len(results) == 0: | |
print "[-] No string xref address hit! Maybe find some new string xref address?" | |
sys.exit(-1) | |
# Top 10 | |
print "[+] Top 10 string hit count ..." | |
count = 0 | |
for item in results: | |
print " ---> load_base: %#x, str_hit_count: %d" % (item[0], item[1]) | |
count += 1 | |
if count >= 10: | |
break | |
print "[+] Possible load_base: %#x" % (results[0][0]) | |
def find_base_address_via_code_xref(bin_path, xref_addrs, start_addr, end_addr, arch="mipsb", step=0x20, ): | |
if arch not in FUNCTION_PROLOGS: | |
print "[-] arch %s not support" | |
sys.exit(-1) | |
with open(bin_path, "rb") as f: | |
content = f.read() | |
results = [] | |
while start_addr <= end_addr: | |
hit_count = 0 | |
for addr in xref_addrs: | |
ins_hex = content[addr-start_addr:addr-start_addr + 20] | |
if re.match(FUNCTION_PROLOGS[arch], ins_hex): | |
hit_count += 1 | |
if hit_count > 0: | |
results.append((start_addr, hit_count)) | |
start_addr += step | |
results.sort(key=lambda x: x[1], reverse=True) | |
if len(results) == 0: | |
print "[-] No instruction xref address hit! Maybe find some new instruction xref address?" | |
sys.exit(-1) | |
# Top 10 | |
print "[+] Top 10 instruction hit count ..." | |
count = 0 | |
for item in results: | |
print " ---> load_base: %#x, instruction_hit_count: %d" % (item[0], item[1]) | |
count += 1 | |
if count >= 10: | |
break | |
print "[+] Possible load_base: %#x" % (results[0][0]) | |
def main(): | |
if len(sys.argv) >= 2: | |
bin_path = sys.argv[1] | |
if not os.path.exists(bin_path): | |
print "[-] path doesn't exist: %s" % bin_path | |
sys.exit(-1) | |
else: | |
print "[-] binary path is missing ..." | |
print "Find addr pattern ..." | |
start_addr, str_bound_addr = (None, None) # init: need to set correctly | |
xref_addresses = find_reference_addresses( bin_path, start_addr=start_addr, endian=">") | |
if str_bound_addr is not None: | |
print "Find and choose fixed strings reference address ..." | |
target_str_addresses = choose_str_addresses(xref_addresses, str_bound_addr) | |
print "Find load addr by checking strings reference address hit count (max: %d) ..." % len( | |
target_str_addresses) | |
max_base_addr = xref_addresses[0] # sorted addr_patterns | |
print "[+] max_base_addr: %#x" % max_base_addr | |
print "Find base address via string xref ..." | |
find_base_address_via_string_xref(bin_path, target_str_addresses, start_addr, max_base_addr, step=0x10) | |
# print "Find base address via code xref ..." | |
# find_base_address_via_code_xref(bin_path, target_str_addresses, start_addr, max_base_addr, arch="mipsb", step=0x10) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
see Zyxel设备eCos固件加载地址分析