Skip to content

Instantly share code, notes, and snippets.

@jonathanhle
Last active September 10, 2022 21:57
Show Gist options
  • Save jonathanhle/fe8adee73f686acfe29aaa0935aa26df to your computer and use it in GitHub Desktop.
Save jonathanhle/fe8adee73f686acfe29aaa0935aa26df to your computer and use it in GitHub Desktop.
parse_access_log.py
import logging
import re
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s- %(message)s')
nginx_access_log_path = "/var/log/nginx/access.log"
def parse_with_split():
response_code_count = {}
with open(nginx_access_log_path) as nginx_access_file:
for line in nginx_access_file.readlines():
line = line.strip()
split_line = line.split('] "')
# split_line_source_dt
split_line_source_dt = split_line[0]
source_ip = split_line_source_dt.split('- [')[0].replace(" -", "")
date = split_line_source_dt.split('- [')[1]
# split_line_remaining
split_line_remaining = split_line[1]
user_agent = split_line_remaining.split('" "')[-1][:-1]
# response_code
response_code = split_line_remaining.split('" "')[0].split('" ')[1].split(" ")[0]
# method_uri_httpversion
method_uri_httpversion_list = split_line_remaining.split('" "')[0].split('" ')[0].split(" ")
if len(method_uri_httpversion_list) == 3:
method = method_uri_httpversion_list[0]
uri = method_uri_httpversion_list[1]
httpversion = method_uri_httpversion_list[2]
# https://nginx.org/en/docs/http/ngx_http_core_module.html#http:~:text=Limits%20allowed%20HTTP%20methods%20inside%20a%20location
elif method not in ["GET", "HEAD", "POST", "PUT", "DELETE", "MKCOL", "COPY", "MOVE", "OPTIONS", "PROPFIND", "PROPPATCH", "LOCK", "UNLOCK", "PATCH", "PRI", "CONNECT"]:
method = "ATTACKED"
uri = "ATTACKED"
httpversion = "ATTACKED"
else:
method = "ATTACKED"
uri = "ATTACKED"
httpversion = "ATTACKED"
if response_code_count.get(response_code) == None:
response_code_count[response_code] = 1
else:
response_code_count[response_code] += 1
logging.info(f'The response_code_count from parse_with_split is {response_code_count}')
return response_code_count
def parse_with_regex():
response_code_count = {}
lineformat = re.compile(
r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|HEAD|POST|PUT|DELETE|MKCOL|COPY|MOVE|OPTIONS|PROPFIND|PROPPATCH|LOCK|UNLOCK|PATCH|PRI|CONNECT) )(?P<url>.+)(http\/(1\.1|2\.0)")||.*) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (?P<refferer>-|"([^"]+)") (["](?P<useragent>[^"]+)["])""", re.IGNORECASE)
with open(nginx_access_log_path) as nginx_access_file:
for line in nginx_access_file.readlines():
line_re = re.search(lineformat, line)
try:
line_re_dict = line_re.groupdict()
response_code = line_re_dict["statuscode"]
if response_code_count.get(response_code) == None:
response_code_count[response_code] = 1
else:
response_code_count[response_code] += 1
except:
continue
logging.info(f'The response_code_count from parse_with_regex is {response_code_count}')
return response_code_count
if __name__ == '__main__':
parse_with_split()
parse_with_regex()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment