Last active
January 10, 2020 17:26
-
-
Save brodygov/46c563ea6cd7c0d0c1883f339a0622b0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# frozen_string_literal: true | |
require 'csv' | |
require 'optparse' | |
# ALB log format: | |
# https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html | |
ALB_FIELDS = %w[ | |
type | |
timestamp | |
elb | |
client:port | |
target:port | |
request_processing_time | |
target_processing_time | |
response_processing_time | |
elb_status_code | |
target_status_code | |
received_bytes | |
sent_bytes | |
request | |
user_agent | |
ssl_cipher | |
ssl_protocol | |
target_group_arn | |
trace_id | |
domain_name | |
chosen_cert_arn | |
matched_rule_priority | |
].freeze | |
ALB_INDEX_MAP = {} | |
ALB_FIELDS.each_with_index do |field, index| | |
ALB_INDEX_MAP[field] = index | |
end | |
ALB_INDEX_MAP.freeze | |
RE_ALL_5XX = /\A5/.freeze | |
RE_500 = /\A500/.freeze | |
NULL_FIELD = '-' | |
def find_errors(filename, only_elb_errors: false, only_target_errors: false, | |
status_code_re: RE_ALL_5XX) | |
if only_elb_errors && only_target_errors | |
raise ArgumentError.new('Cannot pass both only_elb_errors and only_target_errors') | |
end | |
# find both types by default | |
elb_errors = true | |
target_errors = true | |
target_errors = false if only_elb_errors | |
elb_errors = false if only_target_errors | |
STDERR.puts('Processing ' + filename.inspect) | |
found_count = 0 | |
total_count = 0 | |
# Treat file as a CSV but with " " instead of "," | |
# This allows us to handle the double quoted fields correctly. | |
CSV.open(filename, 'r', col_sep: ' ').each do |row| | |
total_count += 1 | |
elb_status_code = row.fetch(ALB_INDEX_MAP.fetch('elb_status_code')) | |
target_status_code = row.fetch( | |
ALB_INDEX_MAP.fetch('target_status_code') | |
) | |
if elb_errors | |
if status_code_re.match?(elb_status_code) | |
# If only_elb_errors is set, we want to exclude cases where the error | |
# came from the target group. This means that we only want to consider | |
# it an error here if the target status is "-". | |
next if only_elb_errors && target_status_code != NULL_FIELD | |
found_count += 1 | |
yield row | |
next | |
end | |
end | |
if target_errors | |
if status_code_re.match?(target_status_code) | |
found_count += 1 | |
yield row | |
next | |
end | |
end | |
end | |
[found_count, total_count] | |
end | |
def process_files(filenames, find_errors_opts: {}) | |
files_with_errors = [] | |
files_without_errors = [] | |
sum_found_count = 0 | |
sum_total_count = 0 | |
filenames.each do |file| | |
found_count, total_count = find_errors(file, **find_errors_opts) do |row| | |
puts row.join(' ') | |
end | |
sum_found_count += found_count | |
sum_total_count += total_count | |
if found_count > 0 | |
files_with_errors << file | |
else | |
files_without_errors << file | |
end | |
end | |
if sum_total_count > 0 | |
found_pct = (100.0 * sum_found_count / sum_total_count).round | |
else | |
found_pct = 0 | |
end | |
STDERR.puts('Summary:') | |
STDERR.puts("#{sum_found_count} errors among #{sum_total_count} requests (#{found_pct}%)") | |
STDERR.puts('Files with no errors:') | |
files_without_errors.each { |f| STDERR.puts ' ' + f } | |
STDERR.puts('Files with errors:') | |
files_with_errors.each { |f| STDERR.puts ' ' + f } | |
end | |
def main | |
options = {find_errors_opts: {}} | |
basename = File.basename($0) | |
optparse = OptionParser.new do |opts| | |
opts.banner = <<-EOM | |
usage: #{basename} [OPTIONS] LOG_FILE... | |
Look for HTTP 5xx errors in specified uncompressed ALB LOG_FILEs. | |
By default, look for 5xx errors in either elb_status_code or | |
target_status_code. Use options to filter this more narrowly. | |
Stream each error log line found to stdout. | |
When finished, print a summary to stderr showing which files contain errors and | |
the overall error rate. | |
https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html | |
Parsed log file fields: | |
#{ALB_FIELDS.join(' ')} | |
This script only handles uncompressed log files, so you will probably need to | |
run gunzip *.log.gz first. | |
For example: | |
# Find HTTP 5xx errors from any source | |
#{basename} *.log | |
# Find HTTP 500 errors served by ALB only (like WAF issues) | |
#{basename} --elb-errors --500 *.log | |
# Find HTTP 5xx errors served by backend Target Group only | |
#{basename} --target-errors *.log | |
Options: | |
EOM | |
opts.on('-h', '--help', 'Display this message') do | |
STDERR.puts opts | |
exit | |
end | |
opts.on('-e', '--elb-errors', 'Find only errors in elb_status_code') do | |
options[:find_errors_opts][:only_elb_errors] = true | |
end | |
opts.on('-t', '--target-errors', | |
'Find only errors in target_status_code') do | |
options[:find_errors_opts][:only_target_errors] = true | |
end | |
opts.on('--500', 'Find only HTTP 500 errors, not 5xx') do | |
options[:find_errors_opts][:status_code_re] = RE_500 | |
end | |
opts.on('--regex PAT', | |
'Find HTTP status codes matching PAT, not default "\A5"') do |pat| | |
options[:find_errors_opts][:status_code_re] = Regexp.new(pat) | |
end | |
end | |
args = optparse.parse! | |
if args.empty? | |
STDERR.puts optparse | |
exit 1 | |
end | |
process_files(args, find_errors_opts: options.fetch(:find_errors_opts)) | |
end | |
if $0 == __FILE__ | |
main | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment