Created
January 13, 2022 13:44
-
-
Save notareverser/7e76c122d82ec455278640f4948a3e11 to your computer and use it in GitHub Desktop.
Merge N YARA rules from a file, where the rules have a single string, and the strings are all of the same length
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# yes, this is crappy code | |
# yes, it generally gets the job done | |
import sys | |
def countRules(data): return data.count('rule ') | |
# grab the entire contents of the rule | |
def getRule(data, index): | |
r = 'rule ' | |
rn = 0 | |
rl = data.find(r) | |
while rl != -1: | |
if rn == index: | |
nextr = data.find(r, rl+len(r)) | |
if nextr == -1: nextr = len(data) | |
lastCurly = data.rfind('}', rl, nextr) | |
return data[rl:lastCurly+1] | |
rn += 1 | |
rl = data.find(r, rl+len(r)) | |
return None | |
# figure out how many strings are in the rule | |
def getStringCount(rule): | |
sloc = rule.find('strings:') | |
cloc = rule.find('condition:') | |
if sloc != -1 and cloc != -1: | |
return rule[sloc:cloc].count('$') | |
return None | |
# get the nth string | |
def getString(rule, index): | |
d = '$' | |
sloc = rule.find('strings:') | |
cloc = rule.find('condition:') | |
sdata = rule[sloc:cloc] | |
strloc = sdata.find(d) | |
strn = 0 | |
while strloc != -1: | |
if strn == index: | |
nexts = sdata.find(d, strloc+1) | |
if nexts == -1: nexts = len(sdata) | |
return sdata[strloc:nexts].strip().rstrip() | |
strn += 1 | |
strloc = sdata.find(d, strloc+1) | |
return None | |
# we don't need no stinkin' ASCII or regex strings! No ranges either! | |
def checkByteString(stringData): | |
return ( stringData.find('{') != -1 and | |
stringData.find('}') != -1 and | |
stringData.count('[') == 0 and | |
stringData.count(']') == 0) | |
# get the actual bytes | |
def extractData(data): | |
tdata = data.translate(str.maketrans('','',' {}')) | |
if '=' in tdata: | |
return tdata.split('=')[1] | |
return None | |
# if the characters are the same emit, otherwise emit a '?' | |
def merge(lhs, rhs): | |
merged = '' | |
n = 0 | |
for x in range(len(lhs)): | |
v = (lhs[x] != rhs[x]) | |
c = ('?' if v else lhs[x]) | |
n += v | |
merged += c | |
print("Substituted {:d} wildcard nibbles".format(n), file=sys.stderr) | |
return merged | |
# get the 0th string for the nth rule, remove its spaces/brackets | |
def getRuleStringData(data, index): | |
rs = getString(getRule(data, index), 0) | |
if not checkByteString(rs): | |
print("Can only merge binary strings, not text or regex or ranges. SARRY!", file=sys.stderr) | |
sys.exit(-3) | |
return extractData(rs) | |
def spacify(merged): | |
return ' '.join([merged[i:i+2] for i in range(0, len(merged), 2)]) | |
def main(): | |
if len(sys.argv) <2: | |
print("Specify an input YARA file!", file=sys.stderr); sys.exit(-1) | |
data = open(sys.argv[1], 'r').read() | |
ruleNum = countRules(data) | |
if ruleNum == 0: | |
print("Need some rules to merge", file=sys.stderr) | |
sys.exit(-2) | |
else: | |
print("Attempting to merge {:d} rules".format(ruleNum), file=sys.stderr) | |
# start with the 0th rule, merge in each additional 1-nth rule | |
mergeData = getRuleStringData(data, 0) | |
for x in range(1, ruleNum): | |
print(" Merging in rule {:d}".format(x), file=sys.stderr) | |
rn = getRuleStringData(data, x) | |
if len(mergeData) != len(rn): | |
print("Can only merge terms of equal length", file=sys.stderr); | |
sys.exit(-4) | |
tmerge = merge(mergeData, rn) | |
if tmerge is not None: | |
mergeData = tmerge | |
else: | |
print("Merge failed on rule {:d}".format(x), file=sys.stderr) | |
sys.exit(-5) | |
output = [] | |
output.append('rule Merged') | |
output.append('{') | |
output.append(' strings:') | |
output.append(' $merged = {' + spacify(mergeData) + '}') | |
output.append(' condition:') | |
output.append(' any of them') | |
output.append('}') | |
print('\n'.join(output)) | |
if __name__ == '__main__': main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment