Last active
June 11, 2024 14:39
-
-
Save jweyrich/8d53a7bf5bad7b5958423cb4e538ab20 to your computer and use it in GitHub Desktop.
AWS ALB Log Parser written in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# coding=utf8 | |
# | |
# AUTHOR: Jardel Weyrich <jweyrich at gmail dot com> | |
# | |
from __future__ import print_function | |
import re, sys | |
def parse_alb_log_file(file_path): | |
fields = [ | |
"type", | |
"timestamp", | |
"alb", | |
"client_ip", | |
"client_port", | |
"backend_ip", | |
"backend_port", | |
"request_processing_time", | |
"backend_processing_time", | |
"response_processing_time", | |
"alb_status_code", | |
"backend_status_code", | |
"received_bytes", | |
"sent_bytes", | |
"request_verb", | |
"request_url", | |
"request_proto", | |
"user_agent", | |
"ssl_cipher", | |
"ssl_protocol", | |
"target_group_arn", | |
"trace_id", | |
"domain_name", | |
"chosen_cert_arn", | |
"matched_rule_priority", | |
"request_creation_time", | |
"actions_executed", | |
"redirect_url", | |
"new_field", | |
] | |
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution. | |
# REFERENCE: https://docs.aws.amazon.com/athena/latest/ug/application-load-balancer-logs.html#create-alb-table | |
regex = r"([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) ([^ ]*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-\_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" ($|\"[^ ]*\")(.*)" | |
with open(file_path, 'r') as file: | |
for line in file: | |
matches = re.search(regex, line) | |
if matches: | |
for i, field in enumerate(fields): | |
end = ", " if i < len(fields)-1 else "\n" | |
print("%s=\"%s\"" % (field, matches.group(i+1)), end=end) | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
sys.exit("usage: %s <log_file_path>" % sys.argv[0]) | |
parse_alb_log_file(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@jweyrich using this regex i have build utility which can pull logs from s3 and push parsed log to different destination such as elasticsearch or influxdb for further analysis.
Parsing can be triggered in 3 ways