Skip to content

Instantly share code, notes, and snippets.

@algotrader-dotcom
Forked from hreeder/parser.py
Last active January 3, 2018 16:33
Show Gist options
  • Select an option

  • Save algotrader-dotcom/68045af17b0b89e5349b151dc8f0a7de to your computer and use it in GitHub Desktop.

Select an option

Save algotrader-dotcom/68045af17b0b89e5349b151dc8f0a7de to your computer and use it in GitHub Desktop.
Python nginx Log Parser
#!/usr/bin/env python
import gzip
import os
import sys
import re
os.system("mkdir nginx-logs")
os.system("cp /var/log/nginx/* nginx-logs/")
INPUT_DIR = "nginx-logs"
lineformat = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""", re.IGNORECASE)
for f in os.listdir(INPUT_DIR):
if f.endswith(".gz"):
logfile = gzip.open(os.path.join(INPUT_DIR, f))
else:
logfile = open(os.path.join(INPUT_DIR, f))
for l in logfile.readlines():
data = re.search(lineformat, l)
if data:
datadict = data.groupdict()
ip = datadict["ipaddress"]
datetimestring = datadict["dateandtime"]
url = datadict["url"]
bytessent = datadict["bytessent"]
referrer = datadict["refferer"]
useragent = datadict["useragent"]
status = datadict["statuscode"]
method = data.group(6)
if ( status == 500 ):
print ip, datetimestring, url, bytessent, referrer, useragent, status, method
elif ( status == 502 ):
print ip, datetimestring, url, bytessent, referrer, useragent, status, method
elif ( status == 503 ):
print ip, datetimestring, url, bytessent, referrer, useragent, status, method
elif ( status == 504 ):
print ip, datetimestring, url, bytessent, referrer, useragent, status, method
else:
pass
logfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment