Created
May 14, 2021 11:47
-
-
Save av-gantimurov/d0a3c054685948b7f108ada33d601539 to your computer and use it in GitHub Desktop.
Simple script for cleaning yara rules
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
# Description | |
Simple script for cleaning yara rules. | |
Delete meta, tags and comments. | |
Can obfuscate rule names and fix conditions. | |
Can detect duplicated rules and exclude them. | |
Doesn't support yara include option. | |
You may choose keeping some information by using | |
--keep option. | |
--keep N name T tags C comments M meta A all | |
By default script deletes all meta, comments, tags, obfuscates | |
rule names and detects rule duplication. | |
# Requirements | |
- python3 | |
- plyara | |
# Install | |
``` | |
python3 -m pip install plyara | |
``` | |
# Using | |
- simple combine 2 yara files and output to cleaned.yar | |
with cleaning all meta, tags, comment and obfuscate rule names | |
``` | |
yarclean yara.yar yara2.yar > cleaned.yar | |
``` | |
- Read and clean yara rules from stdin to stdout | |
``` | |
cat yara.yar | yarclean | |
``` | |
- Read yara rules from stding and save to cleaned.yar | |
``` | |
cat yara1.yar yara2.yar | yarclean --out cleaned.yar | |
``` | |
- Clean rule from yara.yar with cleaning, keep rule name | |
to cleaned.yar | |
``` | |
yarclean --keep name -- yara.yar -O cleaned.yar | |
``` | |
- Clean rule from yara.yar with cleaning, keep rule name and tags | |
to cleaned.yar | |
``` | |
yarclean -K N T -- yara.yar -O cleaned.yar | |
``` | |
Author: Gantimurov Alexander | |
Date:2021-05-14 14:06 | |
""" | |
import argparse | |
import logging | |
import sys | |
import plyara | |
import plyara.utils | |
logger = logging.getLogger(__name__) | |
def prepare_argparse(): | |
parser = argparse.ArgumentParser( | |
description="Simple script for cleaning yara rules, Removing tags," | |
" comments, meta and obfuscate rule names. YARA " | |
"include option is not supported.") | |
parser.add_argument("-v", "--verbosity", | |
action="count", | |
default=0, | |
help="Increase output verbosity") | |
parser.add_argument("-K", "--keep", | |
dest="keep", | |
help="Keep some information about rule (all, name, " | |
"meta, comments, tags)", | |
default=[], | |
choices=["A", "all", | |
"N", "name", | |
"M", "meta", | |
"C", "comments", | |
"T", "tags"], | |
nargs="+", | |
type=str, | |
action="extend") | |
parser.add_argument("-D", "--debug", | |
dest="debug", | |
help="Debug", | |
action="store_true") | |
parser.add_argument("-O", "--out", | |
default=sys.stdout, | |
type=argparse.FileType('w'), | |
help="File to store result rules, default stdout") | |
parser.add_argument("files", | |
help="Files to parse, default stdin", | |
metavar="FILE", | |
default=sys.stdin, | |
type=argparse.FileType('r'), | |
nargs="*") | |
parser.set_defaults(mangle_names=True) | |
return parser | |
def rise_log_level(level, log): | |
consoleHandler = logging.StreamHandler() | |
consoleHandler.setLevel(level) | |
log.setLevel(level) | |
log.addHandler(consoleHandler) | |
def cleaning(rules, | |
keep_info=[]): | |
keep_name = any(x in keep_info for x in ["N", "name", "A", "all"]) | |
keep_comments = any(x in keep_info for x in ["C", "comments", "A", "all"]) | |
keep_tags = any(x in keep_info for x in ["T", "tags", "A", "all"]) | |
keep_meta = any(x in keep_info for x in ["M", "meta", "A", "all"]) | |
logger.debug("Cleaning rules, keep name: %s, keep comments: %s, " | |
"keep tags: %s, keep_meta: %s", keep_name, keep_comments, | |
keep_tags, keep_meta) | |
replaced_names = {} | |
rule_names = [] | |
rule_hashes = {} | |
index = 1 | |
cnt_rules = len(rules) | |
len_index = len(str(cnt_rules)) | |
for rule in rules: | |
logger.debug("(%d/%d) Cleaning rule %s", | |
index, | |
cnt_rules, | |
rule["rule_name"]) | |
name = rule["rule_name"] | |
new_name = None | |
rule_hash = plyara.utils.generate_logic_hash(rule) | |
logger.debug(" logic hash %s", rule_hash) | |
if rule_hash in rule_hashes: | |
eq_name = replaced_names.get(rule_hashes[rule_hash], | |
rule_hashes[rule_hash]) | |
if name == eq_name: | |
logger.warning("Rule %s is duplicated by logic hash", name) | |
else: | |
replaced_names[name] = eq_name | |
logger.warning("Rule %s is same as %s by logic hash, " | |
"replace with %s", name, | |
rule_hashes[rule_hash], eq_name) | |
rule["skip"] = True | |
continue | |
else: | |
rule_hashes[rule_hash] = name | |
if keep_name: | |
if name in rule_names: | |
new_name = "{}_{}".format(name, index) | |
logger.warning("Rule %s is already present, change to %s", | |
name, | |
new_name) | |
else: | |
rule_names.append(name) | |
else: | |
new_name = "rule_{:0{len}}".format(index, len=len_index) | |
if new_name: | |
replaced_names[name] = new_name | |
rule["rule_name"] = new_name | |
logger.debug(" change name to '%s'", new_name) | |
if not keep_meta and rule.pop("metadata", None): | |
logger.debug(" remove meta") | |
if not keep_comments and rule.pop("comments", None): | |
logger.debug(" remove comments") | |
if not keep_tags and rule.pop("tags", None): | |
logger.debug(" remove tags") | |
if replaced_names: | |
# found = list(filter(lambda n: n in rule["condition_terms"], | |
# replaced_names)) | |
found = [n for n in replaced_names if n in rule["condition_terms"]] | |
if found: | |
logger.debug(" rewrite condition, found '%s'", | |
", ".join(found)) | |
rule["condition_terms"] = \ | |
[replaced_names.get(c, c) for c in rule["condition_terms"]] | |
index += 1 | |
return rules | |
def dump_rules(rules, out=sys.stdout): | |
is_import_printed = False | |
cnt_dumped = 0 | |
for rule in rules: | |
if is_import_printed: | |
rule.pop("imports", None) | |
if not rule.get("skip"): | |
out.write(plyara.utils.rebuild_yara_rule(rule) + "\n") | |
cnt_dumped += 1 | |
is_import_printed = True | |
logger.info("Dumped %d rules to '%s'", cnt_dumped, out.name) | |
if __name__ == '__main__': | |
parser = prepare_argparse() | |
args = parser.parse_args() | |
if args.debug: | |
rise_log_level(logging.DEBUG, logger) | |
elif args.verbosity > 0: | |
rise_log_level(logging.INFO, logger) | |
logger.debug("Used args: %s", args) | |
parser = plyara.Plyara() | |
cnt_rules = 0 | |
for fl in args.files: | |
logger.debug("Trying parse '%s'", fl.name) | |
parser.parse_string(fl.read()) | |
logger.info("Added %d rules from '%s'", | |
len(parser.rules) - cnt_rules, fl.name) | |
cnt_rules = len(parser.rules) | |
cl_rules = cleaning(parser.rules, | |
keep_info=args.keep) | |
dump_rules(cl_rules, out=args.out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Description
Simple script for cleaning yara rules.
Deletes meta, tags and comments.
Can obfuscate rule names and fix conditions.
Can detect duplicated rules and exclude them.
Doesn't support yara include option.
You may choose keeping some information by using --keep option.
By default script deletes all meta, comments, tags, obfuscates rule names and detects rule duplication.
Requirements
Install
Using