Last active
November 10, 2021 00:00
-
-
Save mttjohnson/a989f43f9879a96d8b2e556107405e46 to your computer and use it in GitHub Desktop.
Parsing nginx web request access logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Yesterday's and today's access log | |
# Filter to only show place order actions submitting payment requests that fail (400) or not | |
# get a count of the number of events in the logs | |
cat $(find . -regex '.*/www-prod_backend-access\.log-[0-9]+' -print0) www-prod_backend-access.log \ | |
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" 400' \ | |
| wc -l | |
cat $(find . -regex '.*/www-prod_backend-access\.log-[0-9]+' -print0) www-prod_backend-access.log \ | |
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" [^4]00' \ | |
| wc -l | |
# Extract a list of IPs from the failed requests | |
cat $(find . -regex '.*/www-prod_backend-access\.log-[0-9]+' -print0) www-prod_backend-access.log \ | |
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" 400' \ | |
| perl -p -e 's/.+"(\d+\.\d+\.\d+\.\d+), 127.0.0.1"/$1/' \ | |
| sort | uniq --count | |
# Extract a list of dates from the failed requests | |
zcat www-prod_backend-access.log-202108*.gz \ | |
| grep -E 'POST \/rest\/[A-Za-z0-9_]+\/V1\/guest-carts\/[A-Za-z0-9]*\/payment-information HTTP\/[1-2]\.[0-1]" 400' \ | |
| perl -p -e 's/\d+\.\d+\.\d+\.\d+.+?\[(.+?)\:.+\].+/$1/' \ | |
| sort | uniq --count | |
# See a list of all requests and sort IP list by count of requests | |
cat access.log \ | |
| cut -d ' ' -f 1 \ | |
| sort | uniq --count | sort -nr | |
# See a list of IPs with a count for how many requests from each IP | |
cat /var/log/nginx/www.example.com-access.log-20190313 \ | |
| grep -Ei 'POST /checkout/onepage/savePayment/.+ "-" "Mozilla/5.0" "-"' \ | |
| cut -d ' ' -f 1 \ | |
| sort | uniq --count | |
# Create list of bad IPs | |
cat /var/log/nginx/www.example.com-access.log-20190313 \ | |
| grep -Ei 'POST /checkout/onepage/savePayment/.+ "-" "Mozilla/5.0" "-"' \ | |
| cut -d ' ' -f 1 \ | |
| sort | uniq > bad_payment_ips.txt | |
# Create a list of all requests from bad IPs | |
while read line; do | |
cat /var/log/nginx/www.example.com-access.log-20190313 \ | |
| grep ${line} >> bad_ips_requests.txt | |
done < bad_payment_ips.txt | |
# count number of bad requests | |
cat bad_ips_requests.txt | wc -l | |
# Look through all the bot requests and filter out certain requests | |
cat ~/bad_ips_requests.txt \ | |
| grep -Eiv 'POST /checkout/onepage/savePayment/' \ | |
| grep -Eiv 'POST /checkout/onepage/saveOrder/form_key/' | |
# Get a list of unique query string parameters | |
cat access.log \ | |
| grep -Ei '_kx' \ | |
| perl -p -e 's/.+?_kx=(.+?)["& ].+/$1/' \ | |
| sort | uniq --count | sort -nr | |
# Filter timestamps on requests to the minute to get counts of requests per minute | |
cat access.log \ | |
| grep -Ei '_kx' \ | |
| perl -p -e 's/\d+\.\d+\.\d+\.\d+.+?\[(.+?\:\d+\:\d+)\:\d+.+\].+/$1/' \ | |
| grep -Ei '01/Oct/2021:1[567]' \ | |
| sort | uniq --count | |
# Complex parsing of log files with Perl | |
# Nginx Access Log RegEx | |
# (?<remote_addr>[^\s]+)\s[^\s]+\s[^\s]+\s\[(?<time_local>.+?\:.+?)\]\s"(?<method>\w+)\s(?<url_path>[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]+)(?<url_query_string>\??[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]*?)\s(?<protocol>[\w\/\.]+?)"\s(?<status>\d+)\s(?<body_bytes_sent>[\d\.]+)\s(?<request_time>[\d\.]+)\s.+ | |
# Labels for Match Groups | |
# remote_addr: $1 $+{remote_addr} | |
# time_local: $2 $+{time_local} | |
# method: $3 $+{method} | |
# url_path: $4 $+{url_path} | |
# url_query_string: $5 $+{url_query_string} | |
# protocol: $6 $+{protocol} | |
# status: $7 $+{status} | |
# body_bytes_sent: $8 $+{body_bytes_sent} | |
# request_time: $9 $+{request_time} | |
cat access.log \ | |
| perl -lne '$_ =~ m/(?<remote_addr>[^\s]+)\s[^\s]+\s[^\s]+\s\[(?<time_local>.+?\:.+?)\]\s"(?<method>\w+)\s(?<url_path>[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]+)(?<url_query_string>\??[\w\/\=\%\&\_\-\.\+\[\]\(\)\!\,]*?)\s(?<protocol>[\w\/\.]+?)"\s(?<status>\d+)\s(?<body_bytes_sent>[\d\.]+)\s(?<request_time>[\d\.]+)\s.+/; | |
$remote_addr = $+{remote_addr}; | |
$time_local = $+{time_local}; | |
$method = $+{method}; | |
$url_path = $+{url_path}; | |
$url_query_string = $+{url_query_string}; | |
$protocol = $+{protocol}; | |
$status = $+{status}; | |
$body_bytes_sent = $+{body_bytes_sent}; | |
$request_time = $+{request_time}; | |
print $_ | |
if ( | |
$time_local =~ m{24/Aug/2021:10:00} | |
and not $url_path =~ m{/static} | |
and not $url_path =~ m{/media} | |
) | |
' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment