Created
November 24, 2012 00:54
-
-
Save svyatov/4137877 to your computer and use it in GitHub Desktop.
nginx log parser, reports top 10 consumers ip by traffic + gives some additional info (pages, urls hits)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bytes_per_ip = Hash.new(0) | |
urls_per_ip = Hash.new(0) | |
pages_per_ip = Hash.new(0) | |
files_extensions_per_ip = Hash.new() | |
# i = 0 | |
File.foreach ARGV.shift do |line| | |
# Line example: [23/Nov/2012:06:26:53 +0400] 123.123.123.123 "GET /images/socializ/google-buzz.png HTTP/1.1" 200 2283 "-" www.domain.ru "http://www.domain.ru/page.html" "Opera/9.80 (J2ME/MIDP; Opera Mini/5.1.24009/28.3126; U; ru) Presto/2.8.119 Version/11.10" | |
/\A\[.+?\]\s(?<ip>.+?)\s"GET (?<url>.+?)\s.+?"\s[0-9]+\s(?<bytes>[0-9]+)\s/ =~ line | |
bytes_per_ip[ip] += bytes.to_i | |
urls_per_ip[ip] += 1 | |
/(?:\.(?<url_file_extension>[a-z]{3,4}))?\z/ =~ url | |
pages_per_ip[ip] +=1 if url_file_extension == 'html' | |
if url_file_extension and url_file_extension != 'html' | |
files_extensions_per_ip[ip] = Hash.new(0) unless files_extensions_per_ip.has_key?(ip) | |
files_extensions_per_ip[ip][url_file_extension] += 1 | |
end | |
# i += 1 | |
# break if i > 1000 | |
end | |
# After sort we got array of [[ip, bytes], [ip, bytes], ...] | |
bytes_per_ip.sort { |a, b| a[1] <=> b[1] }.reverse.take(10).each do |bpi| | |
puts "%15s => %5.2f Gb [pages hits: %i; files hits: %i, extensions: %s]" % | |
[ bpi[0], | |
bpi[1].to_f/(1024**3), # converting bytes to Gb | |
pages_per_ip[bpi[0]], | |
urls_per_ip[bpi[0]], | |
files_extensions_per_ip[bpi[0]] ] | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
ruby analyse_nginx_log.rb nginx-access.log
Output example:
3.2 millions of lines is parsed in 55 seconds. Any suggestions how to improve?