Last active
August 16, 2021 13:36
-
-
Save williballenthin/f4e4f17681bcd3eee2e330c3059608de to your computer and use it in GitHub Desktop.
search for YARA matches in each function within IDA Pro.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
''' | |
search for YARA matches in each function within IDA Pro. | |
upon execution, prompts the user to provide the YARA rules file. | |
requirements: | |
- hexdump | |
- yara-python | |
author: Willi Ballenthin | |
email: [email protected] | |
''' | |
import os | |
import os.path | |
import logging | |
import collections | |
import yara | |
import hexdump | |
import idc | |
import idaapi | |
import idautils | |
logger = logging.getLogger('yara.ida') | |
def append_comment(ea, s, repeatable=False): | |
''' | |
add the given string as a (possibly repeating) comment to the given address. | |
does not add the comment if it already exists. | |
adds the comment on its own line. | |
Args: | |
ea (int): the address at which to add the comment. | |
s (str): the comment text. | |
repeatable (bool): if True, set a repeatable comment. | |
Raises: | |
UnicodeEncodeError: if the given string is not ascii. | |
''' | |
# see: http://blogs.norman.com/2011/security-research/improving-ida-analysis-of-x64-exception-handling | |
s = s.encode('ascii') | |
if repeatable: | |
string = idc.RptCmt(ea) | |
else: | |
string = idc.Comment(ea) | |
if not string: | |
string = s # no existing comment | |
else: | |
if s in string: # ignore duplicates | |
return | |
string = string + '\n' + s | |
if repeatable: | |
idc.MakeRptCmt(ea, string) | |
else: | |
idc.MakeComm(ea, string) | |
def get_data(start, size): | |
''' | |
read the given amount of data from the given start address. | |
better than `idc.GetManyBytes` as it fills in missing bytes with NULLs. | |
Args: | |
start (int): start address. | |
size (int): number of bytes to read. | |
Returns: | |
bytes: `size` bytes, filled with NULL when byte not available from database. | |
''' | |
# best, case, works pretty often. | |
buf = idc.GetManyBytes(start, size) | |
if buf: | |
return buf | |
# but may fail, when there's no byte defined. | |
buf = [] | |
for ea in range(start, start+size): | |
b = idc.GetManyBytes(ea, 1) | |
if b: | |
buf.append(b) | |
else: | |
buf.append(b'\x00') | |
return b''.join(buf) | |
def get_functions(): | |
''' | |
enumerate the functions in the currently loaded module. | |
Yields: | |
int: address of the function. | |
''' | |
for segstart in idautils.Segments(): | |
for fva in idautils.Functions(idc.SegStart(segstart), idc.SegEnd(segstart)): | |
yield fva | |
def get_function_data(fva): | |
for (begin, end) in idautils.Chunks(fva): | |
ret = [] | |
for head in idautils.Heads(begin, end): | |
size = idc.ItemSize(head) | |
buf = idc.GetManyBytes(head, size) | |
ret.append(buf) | |
yield begin, b''.join(ret) | |
def match_function(rules, fva): | |
logger.debug('matching function: 0x%x', fva) | |
for chunkstart, chunk in get_function_data(fva): | |
for match in rules.match(data=chunk): | |
for (offset, sname, s) in match.strings: | |
yield match.namespace, match.rule, sname, fva, chunkstart+offset, s | |
def prompt_for_file_path(title="Select a file to open"): | |
class MyForm(idaapi.Form): | |
def __init__(self): | |
self.invert = False | |
idaapi.Form.__init__(self, r"""{title:s} | |
<#{title:s}#{title:s}:{{iFileOpen}}> | |
""".format(title=title), { 'iFileOpen': idaapi.Form.FileInput(open=True), }) | |
def OnFormChange(self, fid): | |
return 1 | |
f = MyForm() | |
f.Compile() | |
f.iFileOpen.value = "" | |
ok = f.Execute() | |
if ok == 1: | |
ret = f.iFileOpen.value | |
f.Free() | |
return ret | |
f.Free() | |
return None | |
def annotate_match(va, namespace, rulename, stringname): | |
append_comment(va, 'yara: %s/%s/%s' % (namespace, rulename, stringname)) | |
def find_yara_rules(paths): | |
''' | |
search the given sequence of paths for either: | |
- file paths ending in .yara | |
- directories that contain filenames ending in .yara | |
Args: | |
paths (List[str]): list of file system paths to files or directories. | |
Returns: | |
List[str]: list of file system paths to files with extension ".yara". | |
''' | |
ret = [] | |
for path in paths: | |
if os.path.isfile(path): | |
if not path.endswith('.yara'): | |
continue | |
if not os.path.exists(path): | |
logger.warning('YARA rule does not exist: %s', path) | |
continue | |
ret.append(path) | |
elif os.path.isdir(path): | |
if not os.path.exists(path): | |
logger.warning('YARA rule directory does not exist: %s', path) | |
continue | |
for filename in os.listdir(path): | |
if not filename.endswith('.yara'): | |
continue | |
ret.append(os.path.join(path, filename)) | |
return ret | |
def main(): | |
logging.basicConfig(level=logging.INFO) | |
logging.getLogger().setLevel(logging.INFO) | |
if idc.ARGV: | |
logger.debug('found IDA script cli arguments, using those as YARA rules') | |
yarapaths = find_yara_rules(idc.ARGV) | |
else: | |
yarapath = prompt_for_file_path(title='Select YARA rule file') | |
yarapaths = [yarapath] | |
logger.info('compiling YARA rules...') | |
# from file basename (no extension) as namespace to file path | |
rules = yara.compile(filepaths={os.path.basename(path).rpartition('.')[0]: path | |
for path in yarapaths}) | |
logger.info('matching YARA rules...') | |
for fva in get_functions(): | |
matched_vas = collections.defaultdict(lambda: set([])) | |
for namespace, rulename, stringname, fva, va, string in match_function(rules, fva): | |
if va in matched_vas[rulename]: | |
# personal preference: ignore overlapping matches | |
continue | |
logger.info('MATCH: rule: %s/%s/%s', namespace, rulename, stringname) | |
logger.info('location: %s (0x%x) at offset: 0x%x', idc.GetFunctionName(fva), fva, va) | |
logger.info('content:\n%s', hexdump.hexdump(string, result='return')) | |
annotate_match(va, namespace, rulename, stringname) | |
for i in range(va, va + len(string)): | |
matched_vas[rulename].add(i) | |
logger.info('done matching YARA') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment