Created
March 21, 2023 11:50
-
-
Save rpw/b88fb40578781507d9edd2db1b22e925 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# reverse engineering tricks: Determine load addresses using string reference differentials | |
# (c) 2017-2023 Comsecuris GmbH | |
from idc import * | |
from idautils import * | |
from sets import Set | |
ATTEMPTS = 1 | |
############################################################################# | |
def memory_dwords_as_sorted_list(start, end): | |
print("Gathering dwords in memory. This may take a while...") | |
values = [] | |
for a in range(start, end, 4): | |
values += [ Dword(a) ] | |
values = list(Set(values)) | |
values.sort() | |
return values | |
def compute_differences(values): | |
differences = [0] * (len(values)-1) | |
for i in range(len(values)-1): | |
differences[i] = values[i+1] - values[i] | |
return differences | |
# Knuth Morris Pratt algorithm to find all subsequences | |
# https://www.safaribooksonline.com/library/view/python-cookbook-2nd/0596007973/ch05s14.html | |
def KMP(text, pattern): | |
# ensure we can index into pattern, and also make a copy to protect | |
# against changes to 'pattern' while we're suspended by `yield' | |
pattern = list(pattern) | |
length = len(pattern) | |
# build the KMP "table of shift amounts" and name it 'shifts' | |
shifts = [1] * (length + 1) | |
shift = 1 | |
for pos, pat in enumerate(pattern): | |
while shift <= pos and pat != pattern[pos-shift]: | |
shift += shifts[pos-shift] | |
shifts[pos+1] = shift | |
# perform the actual search | |
startPos = 0 | |
matchLen = 0 | |
for c in text: | |
while matchLen == length or matchLen >= 0 and pattern[matchLen] != c: | |
startPos += shifts[matchLen] | |
matchLen -= shifts[matchLen] | |
matchLen += 1 | |
if matchLen == length: yield startPos | |
def identify_base_addresses(values, strs): | |
string_differences = compute_differences(strs) | |
differences = compute_differences(values) | |
for v in string_differences: | |
if v <= 0: | |
print("Invalid sequence of strings. Memory addresses have to be in strictly increasing order.") | |
return [] | |
matches = KMP(differences, string_differences) | |
baseaddrs = [] | |
for pos in matches: | |
baseaddrs += [(values[pos]-strs[0])] | |
return baseaddrs | |
def find_adjacent_strings(strs): | |
from random import randint | |
sample_nr = 6 | |
for attempt in xrange(0, ATTEMPTS): | |
start_off = randint(0, len(strs) - sample_nr) | |
matches = [] | |
print("Looking for adjacent strings (attempt %d at offset %d)" %(attempt + 1, start_off)) | |
for i in xrange(start_off, len(strs)-1): | |
if len(matches) == sample_nr: | |
return matches | |
if strs[i][0] + strs[i][1] == strs[i+1][0]: | |
matches.append(strs[i][0]) | |
else: | |
del matches[:] | |
print("No adjacent strings found...") | |
return [] | |
############################################################################# | |
strings = [] | |
found = False | |
ea = ScreenEA() | |
values = memory_dwords_as_sorted_list(SegStart(ea), SegEnd(ea)) | |
for s in Strings(): | |
strings.append((s.ea, s.length)) | |
## lists of start addresses of strings that are consecutively placed in memory | |
stringaddrs = [0x1b594, 0x1b5b4, 0x1b5d4, 0x1b5f0, 0x1b60c, 0x1b628] | |
baseaddrs = [] | |
for attempt in xrange(0, ATTEMPTS): | |
stringaddrs = find_adjacent_strings(strings) | |
if stringaddrs == []: continue | |
print("using adjacent strings %s" %(["%x" %(s) for s in stringaddrs])) | |
baseaddrs = identify_base_addresses(values, stringaddrs) | |
for baseaddr in baseaddrs: | |
print "Identified potential base address for image: %08x" % baseaddr | |
found = True | |
if found: break | |
if len(baseaddrs) == 0: | |
print("NO potential base address found, you may try again") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment