Skip to content

Instantly share code, notes, and snippets.

@cq674350529
Last active December 1, 2022 05:17
Show Gist options
  • Save cq674350529/74e5b6d31780882c54c80302172ad753 to your computer and use it in GitHub Desktop.
Save cq674350529/74e5b6d31780882c54c80302172ad753 to your computer and use it in GitHub Desktop.
locate base addr by finding fixed xref addrs
#!/usr/bin/env python
import os
import sys
import binascii
import struct
import random
import string
import re
import inspect
FUNCTION_PROLOGS = {
"mipsl": # little endian
r"([\x00-\xff]{2}\x1c\x3c|[\x00-\xff]\x00\x1c\x3c[\x00-\xff]{2}\x9c\x27\x21\xe0\x99\x03)?" # lui $gp, xxx | li $gp, 0x11b0b0; addu $gp, $t9
r"[\x00-\xff]{2}\xbd\x27" # addiu $sp, xxx
r"([\x00-\xff]{2}\x9c\x27)?" # li $gp, xxx
r"([\x00-\xff]{2}\xbf\xaf|[\x00-\xff]{2}\xbe\xaf)", # sw $ra, xxx | sw $fp, 0x14($sp) (for leaf function)
"mipsb": # big endian
r"(\x3c\x1c[\x00-\xff]{2}|\x3c\x1c[\x00-\xff]{2}\x27\x9c[\x00-\xff]{2}\x03\x99\xe0\x21)?" # lui $gp, xxx | li $gp, 0x11b0b0; addu $gp, $t9
r"\x27\xbd[\x00-\xff]{2}" # addiu $sp, xxx
r"(\x27\x9c[\x00-\xff]{2})?" # li $gp, xxx
r"(\xaf\xbf[\x00-\xff]{2}|\xaf\xbe[\x00-\xff]{2})?", # sw $ra, xxx | sw $fp, 0x14($sp) (for leaf function)
}
def get_current_func_name():
# return the name of function calling this
return inspect.stack()[1][3]
def find_reference_addresses(bin_path, start_addr=None, endian="<"):
with open(bin_path, "rb") as f:
content = f.read()
file_size = len(content)
# find all duplicate word patterns
pattern_result = []
# (offset_even, offset_odd): pattern, offset, match_count
pattern_record = [[content[0:2], 0, 0], [content[2:4], 2, 0]]
for i in xrange(4, len(content), 2):
data = content[i:i+2]
index = (i / 2) % 2
if data == pattern_record[index][0]:
pattern_record[index][2] += 1
else:
if pattern_record[index][2] != 0:
pattern_result.append(
(pattern_record[index][0], pattern_record[index][1], pattern_record[index][2]))
pattern_record[index][0] = data
pattern_record[index][1] = i
pattern_record[index][2] = 0
pattern_result.sort(key=lambda item: item[2], reverse=True)
all_addresses = []
for item in pattern_result:
# word pattern: \x00\x00, or hit_count < 3
if item[0] in ("\x00\x00", '\xff\xff') or item[2] < 3:
continue
# start_addr: 0x80000000, so match '\x80\x**\x**\x**' or '\x**\x**\x**\x80'
offset = item[1]
if endian == "<" and (offset / 2) % 2 == 0:
# only match '\x**\x**\x00\x80'
continue
elif endian == ">" and (offset / 2) % 2 == 1:
# only match '\x80\x00\x**\x**'
continue
print "[+] pattern: %s, offset: %#x, count: %d" % (
binascii.hexlify(item[0]), item[1], item[2])
if endian is None:
endian = "<" # default little endian
addresses = []
for _ in range(item[2]):
if (offset/2) % 2 == 0:
addr = struct.unpack("%sI" % endian, content[offset:offset+4])[0]
else:
addr = struct.unpack("%sI" % endian, content[offset-2:offset+2])[0]
# filter out "bad" addresses
if start_addr is None or start_addr <= addr <= start_addr + file_size:
if addr not in addresses:
addresses.append(addr)
offset += 4
if len(addresses) > 0:
print " ---> possiable addr: %s" % ( ",".join(map(lambda x: "%#x" % x, addresses)))
else:
print " ---> no valid address with this pattern"
all_addresses.extend(addresses)
if len(all_addresses) == 0:
print "[-] no addr patterns found!"
sys.exit(-1)
all_addresses = list(set(all_addresses))
all_addresses.sort()
print "[+] All addresses: %s" % ",".join(
map(lambda x: "%#x" % x, all_addresses))
return all_addresses
def choose_str_addresses(all_addresses, str_bound_addr, sample_size=20):
str_addresses = []
code_addresses = []
for item in all_addresses:
if item > str_bound_addr:
str_addresses.append(item)
else:
code_addresses.append(item)
str_addresses_size = len(str_addresses)
if str_addresses_size == 0:
print "[-] no strings reference address found!"
sys.exit(-1)
print "[+] String addresses (size:%d): %s" % (str_addresses_size,
",".join(map(lambda x: "%#x" % x, str_addresses)))
if sample_size >= str_addresses_size:
sample_size = str_addresses_size
# random select some string addresses, to avoid some addresses pointing to all '\x00' region
random_str_addresses = random.sample(str_addresses, sample_size)
print "[+] Choose string addresses (size: %d): %s" % (len(
random_str_addresses), ",".join(map(lambda x: "%#x" % x, random_str_addresses)))
return random_str_addresses
def find_base_address_via_string_xref(bin_path, xref_addrs, start_addr, end_addr, step=0x20):
with open(bin_path, "rb") as f:
content = f.read()
results = []
while start_addr <= end_addr:
hit_count = 0
for addr in xref_addrs:
# exclude those addrs pointing to all '\x00'
if content[addr - start_addr - 1] == '\x00' and \
content[addr - start_addr] in string.printable and \
content[addr - start_addr + 1] in string.printable:
hit_count += 1
if hit_count > 0:
results.append((start_addr, hit_count))
start_addr += step
results.sort(key=lambda x: x[1], reverse=True)
if len(results) == 0:
print "[-] No string xref address hit! Maybe find some new string xref address?"
sys.exit(-1)
# Top 10
print "[+] Top 10 string hit count ..."
count = 0
for item in results:
print " ---> load_base: %#x, str_hit_count: %d" % (item[0], item[1])
count += 1
if count >= 10:
break
print "[+] Possible load_base: %#x" % (results[0][0])
def find_base_address_via_code_xref(bin_path, xref_addrs, start_addr, end_addr, arch="mipsb", step=0x20, ):
if arch not in FUNCTION_PROLOGS:
print "[-] arch %s not support"
sys.exit(-1)
with open(bin_path, "rb") as f:
content = f.read()
results = []
while start_addr <= end_addr:
hit_count = 0
for addr in xref_addrs:
ins_hex = content[addr-start_addr:addr-start_addr + 20]
if re.match(FUNCTION_PROLOGS[arch], ins_hex):
hit_count += 1
if hit_count > 0:
results.append((start_addr, hit_count))
start_addr += step
results.sort(key=lambda x: x[1], reverse=True)
if len(results) == 0:
print "[-] No instruction xref address hit! Maybe find some new instruction xref address?"
sys.exit(-1)
# Top 10
print "[+] Top 10 instruction hit count ..."
count = 0
for item in results:
print " ---> load_base: %#x, instruction_hit_count: %d" % (item[0], item[1])
count += 1
if count >= 10:
break
print "[+] Possible load_base: %#x" % (results[0][0])
def main():
if len(sys.argv) >= 2:
bin_path = sys.argv[1]
if not os.path.exists(bin_path):
print "[-] path doesn't exist: %s" % bin_path
sys.exit(-1)
else:
print "[-] binary path is missing ..."
print "Find addr pattern ..."
start_addr, str_bound_addr = (None, None) # init: need to set correctly
xref_addresses = find_reference_addresses( bin_path, start_addr=start_addr, endian=">")
if str_bound_addr is not None:
print "Find and choose fixed strings reference address ..."
target_str_addresses = choose_str_addresses(xref_addresses, str_bound_addr)
print "Find load addr by checking strings reference address hit count (max: %d) ..." % len(
target_str_addresses)
max_base_addr = xref_addresses[0] # sorted addr_patterns
print "[+] max_base_addr: %#x" % max_base_addr
print "Find base address via string xref ..."
find_base_address_via_string_xref(bin_path, target_str_addresses, start_addr, max_base_addr, step=0x10)
# print "Find base address via code xref ..."
# find_base_address_via_code_xref(bin_path, target_str_addresses, start_addr, max_base_addr, arch="mipsb", step=0x10)
if __name__ == "__main__":
main()
@cq674350529
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment