Skip to content

Instantly share code, notes, and snippets.

@jeffbryner
Created July 1, 2015 20:17
Show Gist options
  • Save jeffbryner/c0076716ff68b4a0252e to your computer and use it in GitHub Desktop.
Save jeffbryner/c0076716ff68b4a0252e to your computer and use it in GitHub Desktop.
apache parsing
#!/usr/bin/env python
import re
import json
import requests
from datetime import datetime
from dateutil.parser import parse
def apachetime(s):
"""
Given a string representation of a datetime in apache format (e.g.
"[01/Sep/2012:06:05:11 +0000]"), return the parsed datetime for that string
"""
month_map = {'Jan': 1, 'Feb': 2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7,
'Aug':8, 'Sep': 9, 'Oct':10, 'Nov': 11, 'Dec': 12}
s = s[1:-1]
#return datetime(int(s[7:11]), month_map[s[3:6]], int(s[0:2]), \
# int(s[12:14]), int(s[15:17]), int(s[18:20]))
#MM/DD/YYYY HH:MM:SS UTCOFFSET
return parse('{0}/{1}/{2} {3}:{4}:{5} {6}'.format(month_map[s[3:6]],s[0:2],s[7:11],s[12:14],s[15:17], s[18:20], s[21:27]))
def main():
'''
get logs, parse and write as json
'''
apachequotedfieldsre=re.compile(r'''"(.*?)"''') #get fields delimited by ""
apachebracketfieldsre=re.compile(r'''(\[.*?\])''') #get fields delimited by []
apachestatusre=re.compile(r''' ([0-9]{3}) ''') #get 3 digit http status field
#retrieve the logs
user_agent = {'User-agent': 'Mozilla/5.0'}
r = requests.get(URLGOESHERE, stream=True, verify=False, headers=user_agent)
x = 0
for line in r.iter_lines():
#print(line)
if len(apachestatusre.findall(line))>0:
#http non-success only
if int(apachestatusre.findall(line)[0]) >= 400:
x += 1
event = dict()
details = dict()
details['log'] = line
event['utctimestamp'] = apachetime(' '.join(apachebracketfieldsre.findall(line)[0:1])).isoformat()
event['category'] = 'weblog'
event['tags'] = ['apache']
details['site'] = line.split()[1]
details['sourceipaddress'] = line.split()[0]
details['request'] = ' '.join(apachequotedfieldsre.findall(line)[0:1])
details['httpstatus'] = ' '.join(apachestatusre.findall(line)[0:1])
event['details'] = details
event['summary'] = details['request']
print(json.dumps(event,indent=4,sort_keys=True))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment