Created
September 19, 2016 20:11
-
-
Save stantonk/1f43f899a09f25477526e56e713f8093 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
# note, currently doesn't handle: | |
# Jun 29 00:00:03 host tag: message repeated 3 times: [ 192.168.1.1 - - "POST /api/someendpoint/ | |
r = re.compile(r'(\w{3,}\s+\d{2,}\s+\d{2,}:\d{2,}:\d{2,})\s+([\w-]+)\s+([\w-]+):\s+((\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3},?\s?)+)\s+-\s+-\s+\"(GET|POST|PUT|PATCH|HEAD|DELETE) ((\/[\w_]+)+)\/\??(.*) HTTP\/\d\.\d" (\d{3,}) (\d+|-)') | |
anonymizers = ( | |
(re.compile(r'(\d+,?)+'), 'id'), # one or more optionally csv separated ids | |
(re.compile(r'\w+:[\w\d\-_:\.]+'), 'guid'), | |
) | |
def anonymize_path(path): | |
for anonymizer, repl in anonymizers: | |
path = anonymizer.sub(repl, path) | |
return path | |
for l in sys.stdin: | |
m = r.search(l.strip()) | |
if m: | |
groups = m.groups() | |
print '%s - %s - %s' % (groups[1], groups[5], anonymize_path(groups[6])) | |
# else: | |
# print 'nomatch: %s' % l |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment