Last active
June 14, 2020 08:43
-
-
Save alexander-hanel/76c6c06d7a2a75ccb5ace0805b38554a to your computer and use it in GitHub Desktop.
Minimum Yara Search for IDAPYTHON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import yara | |
import operator | |
import idautils | |
SEARCH_CASE = 4 | |
SEARCH_REGEX = 8 | |
SEARCH_NOBRK = 16 | |
SEARCH_NOSHOW = 32 | |
SEARCH_UNICODE = 64 | |
SEARCH_IDENT = 128 | |
SEARCH_BRK = 256 | |
class YaraIDASearch: | |
def __init__(self): | |
self.mem_results = "" | |
self.mem_offsets = [] | |
if not self.mem_results: | |
self._get_memory() | |
def _get_memory(self): | |
print("Status: Loading memory for Yara.") | |
result = b"" | |
segments_starts = [ea for ea in idautils.Segments()] | |
offsets = [] | |
start_len = 0 | |
for start in segments_starts: | |
end = idc.get_segm_end(start) | |
result += idc.get_bytes(start, end - start) | |
offsets.append((start, start_len, len(result))) | |
start_len = len(result) | |
print("Status: Memory has been loaded.") | |
self.mem_results = result | |
self.mem_offsets = offsets | |
def _to_virtual_address(self, offset, segments): | |
va_offset = 0 | |
for seg in segments: | |
if seg[1] <= offset < seg[2]: | |
va_offset = seg[0] + (offset - seg[1]) | |
return va_offset | |
def _init_sig(self, sig_type, pattern, sflag): | |
if SEARCH_REGEX & sflag: | |
signature = "/%s/" % pattern | |
if SEARCH_CASE & sflag: | |
# ida is not case sensitive by default but yara is | |
pass | |
else: | |
signature += " nocase" | |
if SEARCH_UNICODE & sflag: | |
signature += " wide" | |
elif sig_type == "binary": | |
signature = "{ %s }" % pattern | |
elif sig_type == "text" and (SEARCH_REGEX & sflag) == False: | |
signature = '"%s"' % pattern | |
if SEARCH_CASE & sflag: | |
pass | |
else: | |
signature += " nocase" | |
signature += " wide ascii" | |
yara_rule = "rule foo : bar { strings: $a = %s condition: $a }" % signature | |
return yara_rule | |
def _compile_rule(self, signature): | |
try: | |
rules = yara.compile(source=signature) | |
except Exception as e: | |
print("ERROR: Cannot compile Yara rule %s" % e) | |
return False, None | |
return True, rules | |
def _search(self, signature): | |
status, rules = self._compile_rule(signature) | |
if not status: | |
return False, None | |
values = [] | |
matches = rules.match(data=self.mem_results) | |
if not matches: | |
return False, None | |
for rule_match in matches: | |
for match in rule_match.strings: | |
match_offset = match[0] | |
values.append(self._to_virtual_address(match_offset, self.mem_offsets)) | |
return values | |
def find_binary(self, bin_str, sflag=0): | |
yara_sig = self._init_sig("binary", bin_str, sflag) | |
offset_matches = self._search(yara_sig) | |
return offset_matches | |
def find_text(self, q_str, sflag=0): | |
yara_sig = self._init_sig("text", q_str, sflag) | |
offset_matches = self._search(yara_sig) | |
return offset_matches | |
def find_sig(self, yara_rule): | |
# recommendation/fork by R3MRUM https://github.com/R3MRUM | |
offset_matches = self._search(yara_rule) | |
return offset_matches | |
def reload_scan_memory(self): | |
self._get_memory() | |
Thanks @R3MRUM for the find_sig function. I updated the byte extraction to be faster and added your code. Cheers.
Hi Alex! Binary search in order to work is missing the brackets.
It should be signature = "{ %s }" % pattern
Thanks for this script is very useful for doing binary searches with wildcards in IDA!
@sisoma2 I typically add the brackets when doing the binary search but I included the update you mentioned. Now it does not require the brackets. Here is a git of this script https://github.com/alexander-hanel/YaraIDASearch/blob/master/yara_ida_search.py
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey Alex, I've added a simple find_sig function that accepts an existing yara signature as opposed to using the present find_text and find_binary functions that build the rule for you. If you think it is worth it, please incorporate the changes into your version:
https://gist.github.com/R3MRUM/f7b1b5ede9876c7e81385c138c871b13