Skip to content

Instantly share code, notes, and snippets.

@etigui
Created January 8, 2020 14:12
Show Gist options
  • Save etigui/aca04ea02684f6c4d4a85b19c505f4a9 to your computer and use it in GitHub Desktop.
Save etigui/aca04ea02684f6c4d4a85b19c505f4a9 to your computer and use it in GitHub Desktop.
Parse Nginx "access.log" file to python object or json
def main():
ng = nginx.Nginx()
logs_parsed = ng.get_obj_parsed_logs('input.log')
for log in logs_parsed:
print(f'{log.date} {log.ip} {log.url} {log.bytes_sent} {log.referrer} {log.user_agent} {log.status} {log.method}')
ng.save_json_parsed_logs('input.log', 'output.josn')
if __name__ == "__main__":
main()
# Ref: https://gist.github.com/hreeder/f1ffe1408d296ce0591d
import re
from datetime import datetime
class Nginx:
def __init__(self):
self.__log_format = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""",re.IGNORECASE)
self.__input_file_name = ''
self.__output_file_name = ''
self.parsed_logs = list()
def __parser(self):
self.parsed_logs.clear()
try:
with open(self.__input_file_name, 'r') as f:
for log in f.readlines():
data = re.search(self.__log_format, log)
if data:
data_dict = data.groupdict()
date = datetime.strptime(data_dict["dateandtime"], "%d/%b/%Y:%H:%M:%S %z").strftime("%Y-%m-%d %H:%M:%S")
self.parsed_logs.append(NginxLogs(data_dict["ipaddress"], date, data_dict["url"], data_dict["bytessent"], data_dict["refferer"], data_dict["useragent"], data_dict["statuscode"], data.group(6)))
except:
raise Exception(f'Cannot open Nginx log file: {self.__input_file_name}')
def __write_file(self):
try:
with open(self.__output_file_name, 'a+') as f:
for log in self.parsed_logs:
f.write(f'{vars(log)}\n')
except:
raise Exception(f'Cannot create Nginx output log file: {self.__output_file_name}')
def get_obj_parsed_logs(self, input_file_name):
self.__input_file_name = input_file_name
self.__parser()
return self.parsed_logs
def save_json_parsed_logs(self, input_file_name, output_file_name):
self.__input_file_name = input_file_name
self.__output_file_name = output_file_name
self.__parser()
self.__write_file()
class NginxLogs:
def __init__(self, ip, date, url, bytes_sent, referrer, user_agent, status, method):
self.ip = ip
self.date = date
self.url = url
self.bytes_sent = bytes_sent
self.referrer = referrer
self.user_agent = user_agent
self.status = status
self.method = method
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment