-
-
Save kholisrag/caa737bd460f0f0fe93ce4fe26e3dbd4 to your computer and use it in GitHub Desktop.
Improved grok-to-regex.py to run with python 3.12 and nested grok pattern
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import argparse | |
import re | |
from os import walk | |
from os.path import join | |
def get_patterns(patterns_dir): | |
patterns = {} | |
for (dirpath, _, filenames) in walk(patterns_dir): | |
for name in filenames: | |
with open(join(dirpath, name)) as f: | |
for line in f.readlines(): | |
if not line.startswith('#') and not line.strip() == "": | |
k, v = line.split(' ', 1) | |
patterns[k] = v.rstrip('\n') | |
return patterns | |
def convert(expression, patterns): | |
groks = re.compile('%{[^}]*}') | |
failed_matches = set() | |
def replace_groks(expr): | |
matches = groks.findall(expr) | |
for m in matches: | |
inner = m.strip('%{}') | |
parts = inner.split(':') | |
if len(parts) == 2: | |
patt, name = parts | |
replacement = '(?<{}>{{}})'.format(name) | |
elif len(parts) == 1: | |
patt = parts[0] | |
replacement = '{}' | |
else: | |
failed_matches.add(inner) | |
continue | |
if patt in patterns: | |
# Recursively process the replacement if it contains more grok patterns | |
replaced_value = replace_groks(patterns[patt]) | |
expr = expr.replace(m, replacement.format(replaced_value)) | |
else: | |
failed_matches.add(inner) | |
return expr | |
final_expression = replace_groks(expression) | |
print(final_expression) | |
if failed_matches: | |
global args | |
print('\nWarning! Unable to match the following expressions:') | |
print(' {}'.format(', '.join(failed_matches))) | |
print('This could be a typo or a missing grok pattern file. Double check your grok patterns directory: {}'.format( | |
args.patterns_dir | |
)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('expression', metavar='expr', help='A grok expression.') | |
parser.add_argument('-d', '--patterns-dir', dest='patterns_dir', default='patterns', | |
help='Directory to find grok patterns.') | |
args = parser.parse_args() | |
patterns = get_patterns(args.patterns_dir) | |
convert(args.expression, patterns) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment