Skip to content

Instantly share code, notes, and snippets.

@av-gantimurov
Created May 14, 2021 11:47
Show Gist options
  • Save av-gantimurov/d0a3c054685948b7f108ada33d601539 to your computer and use it in GitHub Desktop.
Save av-gantimurov/d0a3c054685948b7f108ada33d601539 to your computer and use it in GitHub Desktop.
Simple script for cleaning yara rules
#!/usr/bin/env python3
"""
# Description
Simple script for cleaning yara rules.
Delete meta, tags and comments.
Can obfuscate rule names and fix conditions.
Can detect duplicated rules and exclude them.
Doesn't support yara include option.
You may choose keeping some information by using
--keep option.
--keep N name T tags C comments M meta A all
By default script deletes all meta, comments, tags, obfuscates
rule names and detects rule duplication.
# Requirements
- python3
- plyara
# Install
```
python3 -m pip install plyara
```
# Using
- simple combine 2 yara files and output to cleaned.yar
with cleaning all meta, tags, comment and obfuscate rule names
```
yarclean yara.yar yara2.yar > cleaned.yar
```
- Read and clean yara rules from stdin to stdout
```
cat yara.yar | yarclean
```
- Read yara rules from stding and save to cleaned.yar
```
cat yara1.yar yara2.yar | yarclean --out cleaned.yar
```
- Clean rule from yara.yar with cleaning, keep rule name
to cleaned.yar
```
yarclean --keep name -- yara.yar -O cleaned.yar
```
- Clean rule from yara.yar with cleaning, keep rule name and tags
to cleaned.yar
```
yarclean -K N T -- yara.yar -O cleaned.yar
```
Author: Gantimurov Alexander
Date:2021-05-14 14:06
"""
import argparse
import logging
import sys
import plyara
import plyara.utils
logger = logging.getLogger(__name__)
def prepare_argparse():
parser = argparse.ArgumentParser(
description="Simple script for cleaning yara rules, Removing tags,"
" comments, meta and obfuscate rule names. YARA "
"include option is not supported.")
parser.add_argument("-v", "--verbosity",
action="count",
default=0,
help="Increase output verbosity")
parser.add_argument("-K", "--keep",
dest="keep",
help="Keep some information about rule (all, name, "
"meta, comments, tags)",
default=[],
choices=["A", "all",
"N", "name",
"M", "meta",
"C", "comments",
"T", "tags"],
nargs="+",
type=str,
action="extend")
parser.add_argument("-D", "--debug",
dest="debug",
help="Debug",
action="store_true")
parser.add_argument("-O", "--out",
default=sys.stdout,
type=argparse.FileType('w'),
help="File to store result rules, default stdout")
parser.add_argument("files",
help="Files to parse, default stdin",
metavar="FILE",
default=sys.stdin,
type=argparse.FileType('r'),
nargs="*")
parser.set_defaults(mangle_names=True)
return parser
def rise_log_level(level, log):
consoleHandler = logging.StreamHandler()
consoleHandler.setLevel(level)
log.setLevel(level)
log.addHandler(consoleHandler)
def cleaning(rules,
keep_info=[]):
keep_name = any(x in keep_info for x in ["N", "name", "A", "all"])
keep_comments = any(x in keep_info for x in ["C", "comments", "A", "all"])
keep_tags = any(x in keep_info for x in ["T", "tags", "A", "all"])
keep_meta = any(x in keep_info for x in ["M", "meta", "A", "all"])
logger.debug("Cleaning rules, keep name: %s, keep comments: %s, "
"keep tags: %s, keep_meta: %s", keep_name, keep_comments,
keep_tags, keep_meta)
replaced_names = {}
rule_names = []
rule_hashes = {}
index = 1
cnt_rules = len(rules)
len_index = len(str(cnt_rules))
for rule in rules:
logger.debug("(%d/%d) Cleaning rule %s",
index,
cnt_rules,
rule["rule_name"])
name = rule["rule_name"]
new_name = None
rule_hash = plyara.utils.generate_logic_hash(rule)
logger.debug(" logic hash %s", rule_hash)
if rule_hash in rule_hashes:
eq_name = replaced_names.get(rule_hashes[rule_hash],
rule_hashes[rule_hash])
if name == eq_name:
logger.warning("Rule %s is duplicated by logic hash", name)
else:
replaced_names[name] = eq_name
logger.warning("Rule %s is same as %s by logic hash, "
"replace with %s", name,
rule_hashes[rule_hash], eq_name)
rule["skip"] = True
continue
else:
rule_hashes[rule_hash] = name
if keep_name:
if name in rule_names:
new_name = "{}_{}".format(name, index)
logger.warning("Rule %s is already present, change to %s",
name,
new_name)
else:
rule_names.append(name)
else:
new_name = "rule_{:0{len}}".format(index, len=len_index)
if new_name:
replaced_names[name] = new_name
rule["rule_name"] = new_name
logger.debug(" change name to '%s'", new_name)
if not keep_meta and rule.pop("metadata", None):
logger.debug(" remove meta")
if not keep_comments and rule.pop("comments", None):
logger.debug(" remove comments")
if not keep_tags and rule.pop("tags", None):
logger.debug(" remove tags")
if replaced_names:
# found = list(filter(lambda n: n in rule["condition_terms"],
# replaced_names))
found = [n for n in replaced_names if n in rule["condition_terms"]]
if found:
logger.debug(" rewrite condition, found '%s'",
", ".join(found))
rule["condition_terms"] = \
[replaced_names.get(c, c) for c in rule["condition_terms"]]
index += 1
return rules
def dump_rules(rules, out=sys.stdout):
is_import_printed = False
cnt_dumped = 0
for rule in rules:
if is_import_printed:
rule.pop("imports", None)
if not rule.get("skip"):
out.write(plyara.utils.rebuild_yara_rule(rule) + "\n")
cnt_dumped += 1
is_import_printed = True
logger.info("Dumped %d rules to '%s'", cnt_dumped, out.name)
if __name__ == '__main__':
parser = prepare_argparse()
args = parser.parse_args()
if args.debug:
rise_log_level(logging.DEBUG, logger)
elif args.verbosity > 0:
rise_log_level(logging.INFO, logger)
logger.debug("Used args: %s", args)
parser = plyara.Plyara()
cnt_rules = 0
for fl in args.files:
logger.debug("Trying parse '%s'", fl.name)
parser.parse_string(fl.read())
logger.info("Added %d rules from '%s'",
len(parser.rules) - cnt_rules, fl.name)
cnt_rules = len(parser.rules)
cl_rules = cleaning(parser.rules,
keep_info=args.keep)
dump_rules(cl_rules, out=args.out)
@av-gantimurov
Copy link
Author

av-gantimurov commented May 14, 2021

Description

Simple script for cleaning yara rules.
Deletes meta, tags and comments.
Can obfuscate rule names and fix conditions.
Can detect duplicated rules and exclude them.
Doesn't support yara include option.
You may choose keeping some information by using --keep option.

--keep N name T tags C comments M meta A all

By default script deletes all meta, comments, tags, obfuscates rule names and detects rule duplication.

Requirements

  • python3
  • plyara

Install

python3 -m pip install plyara

Using

  • simple combine 2 yara files and output to cleaned.yar with cleaning all meta, tags, comment and obfuscate rule names
    yarclean yara.yar yara2.yar > cleaned.yar
    
  • Read and clean yara rules from stdin to stdout
    cat yara.yar | yarclean
    
  • Read yara rules from stding and save to cleaned.yar
    cat yara1.yar yara2.yar | yarclean --out cleaned.yar
    
  • Clean rule from yara.yar with cleaning, keep rule name to cleaned.yar
    yarclean --keep name -- yara.yar -O cleaned.yar
    
  • Clean rule from yara.yar with cleaning, keep rule name and tags to cleaned.yar
    yarclean -K N T -- yara.yar -O cleaned.yar
    

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment