Skip to content

Instantly share code, notes, and snippets.

@ernestom
Created September 26, 2012 23:09
Show Gist options
  • Save ernestom/3791206 to your computer and use it in GitHub Desktop.
Save ernestom/3791206 to your computer and use it in GitHub Desktop.
Akamai security report from CLF+WAF logs
#!/usr/bin/python
"""
$ rsync -avz -e ssh USER@'HOST:/var/vsftpd/trife/*201209{14,15,16,17,18}*gz' logs/
$ zcat portal*201209{14,15,16,17,18}*gz | waf-summary.py '2012/09/14 00:00' '2012/09/18 00:00' > portal-14-17_final.csv
"""
import re
import sys
from datetime import datetime, date, timedelta
from dateutil import tz
DATE_REGEX = re.compile("^.*\.(\d{4})(\d{2})(\d{2}).*$")
DATE_FORMAT = "%d/%b/%Y:%H:%M:%S"
DATE_INPUT = "%Y/%m/%d %H:%M"
utc = tz.gettz('UTC')
mexico = tz.gettz('Mexico/General')
start_date = datetime.today()
end_date = start_date + timedelta(1)
if len(sys.argv) > 1:
start_date = datetime.strptime(sys.argv[1], DATE_INPUT)
end_date = datetime.strptime(sys.argv[2], DATE_INPUT)
start_date = start_date.replace(tzinfo=utc)
end_date = end_date.replace(tzinfo=utc)
is_in_range = lambda d: d >= start_date and d <= end_date
parts = [
r'(?P<host>\S+)', # host %h
r'\S+', # indent %l (unused)
r'(?P<user>\S+)', # user %u
r'\[(?P<time>.+)\]', # time %t
r'"(?P<request>.+)"', # request "%r"
r'(?P<status>[0-9]+)', # status %>s
r'(?P<size>\S+)', # size %b (careful, can be '-')
r'"(?P<referer>.*)"', # referer "%{Referer}i"
r'"(?P<agent>.*)"', # user agent "%{User-agent}i"
r'"(?P<waf>.*)"', # akamai waf rules
]
pattern = re.compile(r'\s+'.join(parts) + r'\s*\Z')
# time range for grouping in minutes
delta = 10
total = {
'hits': 0,
'triggered': 0,
'warning': 0,
'deny': 0,
}
WEEK_FORMAT = "%Y/%m/%d"
def summary_logs():
last_key = ''
summary = {}
count = 0
time = None
for line in sys.stdin:
line = line.strip("\n")
m = pattern.match(line)
if m is None:
continue
data = m.groupdict()
(time_text, offset) = data['time'].split(' ')
time = datetime.strptime(time_text, DATE_FORMAT)
time = time.replace(tzinfo=utc)
#time = time.replace(tzinfo=mexico)
if not is_in_range(time):
continue
count += 1
if count == 1 :
sys.stderr.write("start at %s" % time)
key = time.strftime("%Y/%m/%d")
if (last_key != key):
last_key = key
sys.stderr.write("Process: %s\n" % last_key)
if summary.get(key, False):
summary[key]['hits'] += 1
else:
summary[key] = {
'key': key,
'hits' : 1,
'count': {
'deny': 0,
'warning': 0,
'all':0
},
'deny': {},
'warning': {},
'triggered': 0
}
waf = data['waf'].split("|")
total['hits'] += 1
if len(waf) == 3:
warning = [w for w in waf[1].split(':') if len(w) > 0]
deny = [ d for d in waf[2].split(':') if len(d) > 0]
len_deny = len(deny)
len_warning = len(warning)
summary[key]['triggered'] += len_warning + len_deny
if len(deny) > 0:
summary[key]['count']['deny'] += len_deny
if len(warning) > 0:
summary[key]['count']['warning'] += len_warning
summary[key]['count']['all'] += 1
for rule in warning:
if summary[key]['warning'].get(rule, None) is None:
summary[key]['warning'][rule] = 0
summary[key]['warning'][rule] += 1
for rule in deny:
if summary[key]['deny'].get(rule, None) is None:
summary[key]['deny'][rule] = 0
summary[key]['deny'][rule] += 1
sys.stderr.write("end at %s" % time)
return summary
def percent(hits, total):
if float(total) > 0:
return "%.2f %%" % ((float(hits) / float(total)) * 100)
else:
return '-'
results = summary_logs()
drules = {}
wrules = {}
for key, data in results.items():
for rule in data['deny']:
if not drules.get(rule):
drules[rule] = 0
drules[rule] += 1
for rule in data['warning']:
if not wrules.get(rule):
wrules[rule] = 0
wrules[rule] += 1
drules = drules.keys()
wrules = wrules.keys()
heads = ['D ' + r for r in drules ]
for r in wrules:
heads.append('W ' + r)
cells = lambda items: ",".join(['"%s"' % i for i in items])
print '"","Date","Hits","WAF Triggered","WAF Deny","WAF Warning", %s' % (cells(heads))
for key, data in sorted(results.items(), key=lambda t: t[0]):
values = [data['count']['deny'], data['count']['warning']]
for r in drules:
values.append(data['deny'].get(r, 0))
for r in wrules:
values.append(data['warning'].get(r, 0))
ptotal = lambda x: percent(x, data['hits'])
pwaf = lambda x: percent(x, data['triggered'])
ptvalues = [ptotal(v) for v in values]
pwvalues = [pwaf(v) for v in values]
print '"","%s","%s","%s",%s' % (key, data['hits'], data['triggered'], cells(values))
print '"%% sobre Hits","","","",%s' % cells(ptvalues)
print '"%% sobre WAF triggered","","","",%s' % cells(pwvalues)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment