Created
December 15, 2019 14:33
-
-
Save darkarnium/4aa515d1a55bc7aae1d5d93aafcbfa7f to your computer and use it in GitHub Desktop.
IDA - Attempt to locate literal pools, and mark subsequent sections as code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# NOTE: Before running, please ensure to set the minimal string length to 2 | |
# characters. This can be done by right-clicking any white space in the | |
# IDA Strings window, selecting 'Setup', entering '2' into the 'Minimal | |
# string length' field, and clicking 'OK'. | |
# | |
import time | |
import idautils | |
# Define the addresses to 'scan' for literal pools. | |
rom_scan_start = 0x8000000 | |
rom_scan_end = 0x807FFFF | |
# Used to track addresses of strings known to IDA (as int). | |
string_addrs = {} | |
# Used to track potential literal pool locations. | |
literal_pools = [] | |
def has_string_entry(addr): | |
''' | |
Check whether the provided address is being tracked by IDA as a string. In | |
order to attempt to speed up subsequent lookups, string addresses will be | |
pushed into a list on first use. | |
Args: | |
addr (int): The address to check. | |
Returns: | |
The length of the string as known by IDA, or None. | |
''' | |
if len(string_addrs) == 0: | |
for s in idautils.Strings(): | |
string_addrs[s.ea] = s.length | |
try: | |
return string_addrs[addr] | |
except KeyError: | |
return None | |
def locate_literal_pools(s_addr, e_addr): | |
''' | |
Attempts to locate all potential literal pools within the provided address | |
range by looking for sections which are either UNKNOWN or DATA which have | |
cross-refs, but are NOT known to IDA as strings. | |
Args: | |
s_addr (int): The starting to start scanning at. | |
e_addr (int): The address to stop scanning at. | |
Returns: | |
A list of potential literal pool addresses. | |
''' | |
pools = [] | |
c_addr = s_addr | |
while c_addr < e_addr: | |
flags = GetFlags(c_addr) | |
size = get_item_size(c_addr) | |
# Skip addresses marked as code. | |
if isCode(flags): | |
c_addr += size | |
continue | |
# Check if this address is flagged as being unknown, tail, or data. | |
if (isData(flags) or isUnknown(flags) or isTail(flags)): | |
# Check if the FF_REF flag is set. | |
if flags & 4096 == 4096: | |
# Finally, check if this address is known to IDA as a string. | |
# If it is not, then it may be a literal pool. | |
if not has_string_entry(c_addr): | |
pools.append(c_addr) | |
c_addr += size | |
continue | |
c_addr += size | |
return pools | |
def should_mark_as_code(addr): | |
''' | |
Performs some heuristics on an address to determine whether to mark it as | |
code. This is intended for use with ARM binaries which contain literal | |
pools, and mileage may vary. | |
Args: | |
addr (int): The address to analyse | |
Results: | |
Whether to mark the section as code or not (Boolean). | |
''' | |
# Skip addresses which are marked as being potential literal pools. | |
if addr in literal_pools: | |
return False | |
# Skip addresses marked as code. | |
flags = GetFlags(addr) | |
if isCode(flags): | |
return False | |
# Skip addresses marked as strings. | |
if has_string_entry(addr): | |
return False | |
# Skip NULLs and characters in the ASCII range. | |
if int(print_operand(addr, 1), 16) <= 0x7E: | |
return False | |
return True | |
loop_count = 1 | |
all_success = set() | |
all_failure = set() | |
while True: | |
marked_success = [] | |
marked_failure = [] | |
# Flatten any cached strings each loop. | |
string_addrs = {} | |
# Determine potential literal pool locations each loop. | |
print('[-] Attempting to locate literal pools') | |
literal_pools = locate_literal_pools(rom_scan_start, rom_scan_end) | |
print('[-] Attempting to mark sections following literal pools as code') | |
for c_addr in literal_pools: | |
# We always operate on the NEXT address, as we're trying to inspect the | |
# address AFTER the literal pool entry. | |
n_addr = c_addr + get_item_size(c_addr) | |
# Attempt to mark as code, wait for AA to finish, and check if the | |
# address is now marked as code or not. | |
if should_mark_as_code(n_addr): | |
ida_auto.auto_make_code(n_addr) | |
ida_auto.auto_wait() | |
# If the address doesn't now have the FF_CODE flag after AA, then | |
# it wasn't able to be processed as code. | |
if isCode(GetFlags(n_addr)): | |
marked_success.append(n_addr) | |
all_success.add(n_addr) | |
else: | |
marked_failure.append(n_addr) | |
all_failure.add(n_addr) | |
# Print results and loop - if required. | |
print( | |
'[+] {0} addresses successfully marked as code, {1} failed'.format( | |
len(marked_success), | |
len(marked_failure) | |
) | |
) | |
# Loop if we successfully marked any new sections as code to ensure that | |
# any newly analysed sections are processed. | |
loop_count += 1 | |
if len(marked_success) > 0: | |
print('[-] Recursing to process new code sections') | |
continue | |
# ..aaand we're done. | |
break | |
# Summary and exit. | |
print( | |
'[+] Recursed {0} times with {1} succesfully marked, {2} failed'.format( | |
loop_count, | |
len(all_success), | |
len(all_failure) | |
) | |
) | |
success_hex = ['0x{0:0x}'.format(addr) for addr in all_success] | |
failure_hex = ['0x{0:0x}'.format(addr) for addr in all_failure] | |
print( | |
'[!] Failed addresses: {0}'.format( | |
', '.join(failure_hex) | |
) | |
) | |
print( | |
'[+] Success addresses: {0}'.format( | |
', '.join(success_hex) | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment