Created
June 30, 2010 23:09
-
-
Save georgeguimaraes/459340 to your computer and use it in GitHub Desktop.
one-liners and scripts to deal with apache logs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# original from http://vafer.org/blog/20081121232846 | |
# changed to output requests per second | |
#!/usr/bin/ruby | |
# MIT License | |
# based on http://topfunky.net/svn/plugins/mint/lib/log_parser.rb | |
# from Jan Wikholm | |
require 'date' | |
class LogFormat | |
attr_reader :name, :format, :format_symbols, :format_regex | |
DIRECTIVES = { | |
'h' => [:ip, /\d+\.\d+\.\d+\.\d+/], | |
'l' => [:auth, /.*?/], | |
'u' => [:username, /.*?/], | |
't' => [:datetime, /\[.*?\]/], | |
'r' => [:request, /.*?/], | |
's' => [:status, /\d+/], | |
'b' => [:bytecount, /-|\d+/], | |
'v' => [:domain, /.*?/], | |
'i' => [:header_lines, /.*?/], | |
'e' => [:errorlevel, /\[.*?\]/], | |
} | |
def initialize(name, format) | |
@name, @format = name, format | |
parse_format(format) | |
end | |
def parse_format(format) | |
format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/ | |
log_format_symbols = [] | |
format_regex = "" | |
format.scan(format_directive) do |condition, subdirective, directive_char, ignored| | |
log_format, match_regex = process_directive(directive_char, subdirective, condition) | |
ignored.gsub!(/\s/, '\\s') unless ignored.nil? | |
log_format_symbols << log_format | |
format_regex << "(#{match_regex})#{ignored}" | |
end | |
@format_symbols = log_format_symbols | |
@format_regex = /^#{format_regex}/ | |
end | |
def process_directive(directive_char, subdirective, condition) | |
directive = DIRECTIVES[directive_char] | |
case directive_char | |
when 'i' | |
log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym | |
[log_format, directive[1].source] | |
else | |
[directive[0], directive[1].source] | |
end | |
end | |
end | |
class LogParser | |
LOG_FORMATS = { | |
:common => '%h %l %u %t \"%r\" %>s %b', | |
:common_with_virtual => '%v %h %l %u %t \"%r\" %>s %b', | |
:combined => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"', | |
:combined_with_virtual => '%v %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"', | |
:combined_with_cookies => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"' | |
} | |
attr_reader :known_formats | |
def initialize() | |
@log_format = [] | |
initialize_known_formats | |
end | |
def initialize_known_formats | |
@known_formats = {} | |
LOG_FORMATS.each do |name, format| | |
@known_formats[name] = LogFormat.new(name, format) | |
end | |
end | |
def check_format(line) | |
@known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format| | |
return key if line.match(log_format.format_regex) | |
} | |
return :unknown | |
end | |
def parse_line(line) | |
@format = check_format(line) | |
log_format = @known_formats[@format] | |
raise ArgumentError if log_format.nil? or line !~ log_format.format_regex | |
data = line.scan(log_format.format_regex).flatten | |
parsed_data = {} | |
log_format.format_symbols.size.times do |i| | |
parsed_data[log_format.format_symbols[i]] = data[i] | |
end | |
parsed_data[:datetime] = parsed_data[:datetime][1...-1] if parsed_data[:datetime] | |
parsed_data[:domain] = parsed_data[:ip] unless parsed_data[:domain] | |
parsed_data[:format] = @format | |
parsed_data | |
end | |
end | |
parser = LogParser.new | |
current_key = 0 | |
count = 0 | |
while STDIN.gets | |
line = $_ | |
parsed_data = parser.parse_line(line) | |
parsed_data[:datetime] =~ %r{(\d{2})/(\w{3})/(\d{4}):(\d{2}):(\d{2}):(\d{2})} | |
day, month, year, hour, minute, second = $1, $2, $3, $4, $5, $6 | |
key = Time.mktime(year, month, day, hour, minute, second).to_i | |
if key != current_key | |
if count > 0 | |
printf "%d %d\n", key, count | |
end | |
count = 0 | |
current_key = key | |
end | |
count = count + 1 | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# filters out urls with '.' in it, | |
# usually assets requests (favicon.ico, .jpg, .gif, .css, .js) | |
egrep -v "GET /.+\.[[:alpha:]]" access.log > access-rails.log |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
not so effective, but did the job for me in a specific case