Skip to content

Instantly share code, notes, and snippets.

@notareverser
Created January 13, 2022 13:44
Show Gist options
  • Save notareverser/7e76c122d82ec455278640f4948a3e11 to your computer and use it in GitHub Desktop.
Save notareverser/7e76c122d82ec455278640f4948a3e11 to your computer and use it in GitHub Desktop.
Merge N YARA rules from a file, where the rules have a single string, and the strings are all of the same length
#!/usr/bin/env python3
# yes, this is crappy code
# yes, it generally gets the job done
import sys
def countRules(data): return data.count('rule ')
# grab the entire contents of the rule
def getRule(data, index):
r = 'rule '
rn = 0
rl = data.find(r)
while rl != -1:
if rn == index:
nextr = data.find(r, rl+len(r))
if nextr == -1: nextr = len(data)
lastCurly = data.rfind('}', rl, nextr)
return data[rl:lastCurly+1]
rn += 1
rl = data.find(r, rl+len(r))
return None
# figure out how many strings are in the rule
def getStringCount(rule):
sloc = rule.find('strings:')
cloc = rule.find('condition:')
if sloc != -1 and cloc != -1:
return rule[sloc:cloc].count('$')
return None
# get the nth string
def getString(rule, index):
d = '$'
sloc = rule.find('strings:')
cloc = rule.find('condition:')
sdata = rule[sloc:cloc]
strloc = sdata.find(d)
strn = 0
while strloc != -1:
if strn == index:
nexts = sdata.find(d, strloc+1)
if nexts == -1: nexts = len(sdata)
return sdata[strloc:nexts].strip().rstrip()
strn += 1
strloc = sdata.find(d, strloc+1)
return None
# we don't need no stinkin' ASCII or regex strings! No ranges either!
def checkByteString(stringData):
return ( stringData.find('{') != -1 and
stringData.find('}') != -1 and
stringData.count('[') == 0 and
stringData.count(']') == 0)
# get the actual bytes
def extractData(data):
tdata = data.translate(str.maketrans('','',' {}'))
if '=' in tdata:
return tdata.split('=')[1]
return None
# if the characters are the same emit, otherwise emit a '?'
def merge(lhs, rhs):
merged = ''
n = 0
for x in range(len(lhs)):
v = (lhs[x] != rhs[x])
c = ('?' if v else lhs[x])
n += v
merged += c
print("Substituted {:d} wildcard nibbles".format(n), file=sys.stderr)
return merged
# get the 0th string for the nth rule, remove its spaces/brackets
def getRuleStringData(data, index):
rs = getString(getRule(data, index), 0)
if not checkByteString(rs):
print("Can only merge binary strings, not text or regex or ranges. SARRY!", file=sys.stderr)
sys.exit(-3)
return extractData(rs)
def spacify(merged):
return ' '.join([merged[i:i+2] for i in range(0, len(merged), 2)])
def main():
if len(sys.argv) <2:
print("Specify an input YARA file!", file=sys.stderr); sys.exit(-1)
data = open(sys.argv[1], 'r').read()
ruleNum = countRules(data)
if ruleNum == 0:
print("Need some rules to merge", file=sys.stderr)
sys.exit(-2)
else:
print("Attempting to merge {:d} rules".format(ruleNum), file=sys.stderr)
# start with the 0th rule, merge in each additional 1-nth rule
mergeData = getRuleStringData(data, 0)
for x in range(1, ruleNum):
print(" Merging in rule {:d}".format(x), file=sys.stderr)
rn = getRuleStringData(data, x)
if len(mergeData) != len(rn):
print("Can only merge terms of equal length", file=sys.stderr);
sys.exit(-4)
tmerge = merge(mergeData, rn)
if tmerge is not None:
mergeData = tmerge
else:
print("Merge failed on rule {:d}".format(x), file=sys.stderr)
sys.exit(-5)
output = []
output.append('rule Merged')
output.append('{')
output.append(' strings:')
output.append(' $merged = {' + spacify(mergeData) + '}')
output.append(' condition:')
output.append(' any of them')
output.append('}')
print('\n'.join(output))
if __name__ == '__main__': main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment