Created
July 1, 2015 20:17
-
-
Save jeffbryner/c0076716ff68b4a0252e to your computer and use it in GitHub Desktop.
apache parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import json | |
import requests | |
from datetime import datetime | |
from dateutil.parser import parse | |
def apachetime(s): | |
""" | |
Given a string representation of a datetime in apache format (e.g. | |
"[01/Sep/2012:06:05:11 +0000]"), return the parsed datetime for that string | |
""" | |
month_map = {'Jan': 1, 'Feb': 2, 'Mar':3, 'Apr':4, 'May':5, 'Jun':6, 'Jul':7, | |
'Aug':8, 'Sep': 9, 'Oct':10, 'Nov': 11, 'Dec': 12} | |
s = s[1:-1] | |
#return datetime(int(s[7:11]), month_map[s[3:6]], int(s[0:2]), \ | |
# int(s[12:14]), int(s[15:17]), int(s[18:20])) | |
#MM/DD/YYYY HH:MM:SS UTCOFFSET | |
return parse('{0}/{1}/{2} {3}:{4}:{5} {6}'.format(month_map[s[3:6]],s[0:2],s[7:11],s[12:14],s[15:17], s[18:20], s[21:27])) | |
def main(): | |
''' | |
get logs, parse and write as json | |
''' | |
apachequotedfieldsre=re.compile(r'''"(.*?)"''') #get fields delimited by "" | |
apachebracketfieldsre=re.compile(r'''(\[.*?\])''') #get fields delimited by [] | |
apachestatusre=re.compile(r''' ([0-9]{3}) ''') #get 3 digit http status field | |
#retrieve the logs | |
user_agent = {'User-agent': 'Mozilla/5.0'} | |
r = requests.get(URLGOESHERE, stream=True, verify=False, headers=user_agent) | |
x = 0 | |
for line in r.iter_lines(): | |
#print(line) | |
if len(apachestatusre.findall(line))>0: | |
#http non-success only | |
if int(apachestatusre.findall(line)[0]) >= 400: | |
x += 1 | |
event = dict() | |
details = dict() | |
details['log'] = line | |
event['utctimestamp'] = apachetime(' '.join(apachebracketfieldsre.findall(line)[0:1])).isoformat() | |
event['category'] = 'weblog' | |
event['tags'] = ['apache'] | |
details['site'] = line.split()[1] | |
details['sourceipaddress'] = line.split()[0] | |
details['request'] = ' '.join(apachequotedfieldsre.findall(line)[0:1]) | |
details['httpstatus'] = ' '.join(apachestatusre.findall(line)[0:1]) | |
event['details'] = details | |
event['summary'] = details['request'] | |
print(json.dumps(event,indent=4,sort_keys=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment