Last active
February 22, 2021 08:19
-
-
Save arika/61a79110d4677683d05989e9e2ed2d66 to your computer and use it in GitHub Desktop.
Railsのログファイルから条件指定して抽出する http://aylog.tumblr.com/post/170672481465/rails%E3%81%AE%E3%83%AD%E3%82%B0%E3%82%92%E6%A4%9C%E7%B4%A2%E3%81%99%E3%82%8B
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# frozen_string_literal: true | |
require 'time' | |
require 'optparse' | |
require 'ostruct' | |
require 'fileutils' | |
class Index | |
reqid_regexp = /\h{8}-\h{4}-\h{4}-\h{4}-\h{12}/ | |
LOG_REGEXP = /\A., \[(?<time>\S+) #(?<pid>\d+)\] *(?<severity>\S+) -- :(?: \[(?<reqid>#{reqid_regexp})\])? (?<message>.*)/.freeze | |
ANSI_ESCAPE_SEQ_REGEXP = /\e\[(?:\d{1,2}(?:;\d{1,2})?)?[mK]/.freeze | |
def initialize(io) | |
@io = io | |
end | |
def select(selector) | |
buff = {} | |
prev_time = nil | |
found = false | |
@io.each_line do |line| | |
m = LOG_REGEXP.match(line) | |
next unless m | |
pid = m[:pid] | |
reqid = m[:reqid] | |
message = m[:message] | |
time = Time.parse(m[:time]) | |
log = { time: time, severity: m[:severity], message: message } | |
ident = reqid || pid | |
data = buff[ident] if buff.key?(ident) | |
if /\AStarted (?<http_method>\S+) "(?<path>[^"]*)" for (?<client>\S+)/ =~ message | |
buff.delete(ident) | |
log[:interval] = 0.0 | |
prev_time = time | |
data = { | |
id: reqid || time.strftime("#{pid}-%Y%m%d-%H%M%S-%6N"), | |
begin_time: time, | |
pid: pid, | |
request_id: reqid, | |
http_method: http_method, | |
path: path, | |
client: client, | |
logs: [log], | |
orig_logs: [line] | |
} | |
buff[ident] = data | |
next | |
end | |
next unless data | |
message.gsub!(ANSI_ESCAPE_SEQ_REGEXP, '') | |
log[:interval] = time - prev_time | |
prev_time = time | |
if /\AProcessing by (?<controller>[^\s#]+)#(?<action>\S+)/ =~ message | |
data[:controller] = controller | |
data[:action] = action | |
data[:logs] << log | |
data[:orig_logs] << line | |
buff.delete(ident) unless selector.pre_filter(data) | |
elsif /\A Parameters: (?<params>.*)/ =~ message | |
data[:parameters] = params | |
data[:logs] << log | |
data[:orig_logs] << line | |
elsif /\ACompleted (?<http_status>\d+) .* in (?<duration>\d+)ms/ =~ message | |
data[:http_status] = http_status | |
data[:duration] = duration.to_i | |
data[:end_time] = time | |
data[:logs] << log | |
data[:orig_logs] << line | |
selector.run(data) do |i| | |
yield(i) | |
found = true | |
end | |
buff.delete(ident) | |
else | |
data[:logs] << log | |
data[:orig_logs] << line | |
end | |
end | |
found | |
end | |
end | |
module Filter | |
class Base | |
def apply(obj) | |
obj | |
end | |
end | |
class RequestId < Base | |
def initialize(reqids) | |
@request_ids = reqids | |
end | |
def apply(data) | |
@request_ids.any? do |request_id| | |
data[:request_id] == request_id | |
end | |
end | |
end | |
class ControllerAction < Base | |
def initialize(names) | |
@controller_actions = controller_actions(names) | |
end | |
def apply(data) | |
@controller_actions.any? do |controller, action| | |
data[:controller] == controller && | |
(action.nil? || data[:action] == action) | |
end | |
end | |
private | |
def controller_actions(names) | |
names.map do |name| | |
controller, action = name.split(/#/, 2) | |
controller = classify(controller) << 'Controller' unless controller.match?(/Controller\z/) | |
[controller, action] | |
end | |
end | |
def classify(name) | |
name.scan(%r{(?:/|[^_/]+)}) | |
.map { |seg| seg == '/' ? '::' : seg.capitalize } | |
.join | |
end | |
end | |
class HttpMethod < Base | |
def initialize(http_method) | |
@http_method = http_method.upcase | |
end | |
def apply(data) | |
data[:http_method] == @http_method | |
end | |
end | |
class HttpStatus < Base | |
def initialize(pattern) | |
m = /\A([,\d]+)?(?:!([,\d]+))?\z/.match(pattern) | |
raise ArgumentError unless m | |
@includes = m[1] ? m[1].split(/,/) : [] | |
@excludes = m[2] ? m[2].split(/,/) : [] | |
end | |
def apply(data) | |
http_status = data[:http_status] | |
@excludes.none? { |exc| http_status.index(exc)&.zero? } && | |
(@includes.empty? || @includes.any? { |inc| http_status.index(inc)&.zero? }) | |
end | |
end | |
module RangePattern | |
private | |
def parse_range_pattern(pattern) | |
if /\A(?<range_begin>.*?[^.])?\.\.(?<exclude_end>\.)?(?<range_end>[^.].*)?\z/ =~ pattern | |
@range_begin = yield(range_begin) if range_begin | |
@range_end = yield(range_end) if range_end | |
@exclude_end = !exclude_end.nil? | |
elsif /\A(?<range_begin>.+),(?<delta>[^,]+)\z/ =~ pattern | |
delta = delta.to_f | |
@range_begin = yield(range_begin) | |
@range_end = @range_begin + delta | |
@range_begin -= delta | |
@exclude_end = true | |
else | |
raise ArgumentError | |
end | |
end | |
def cover?(value) | |
if @range_begin && @range_end && @exclude_end | |
@range_begin <= value && value < @range_end | |
elsif @range_begin && @range_end | |
@range_begin <= value && value <= @range_end | |
elsif @range_begin | |
@range_begin <= value | |
elsif @range_end && @exclude_end | |
value < @range_end | |
elsif @range_end | |
value <= @range_end | |
end | |
end | |
end | |
class TimeRange < Base | |
include RangePattern | |
def initialize(pattern) | |
parse_range_pattern(pattern) { |time_str| Time.parse(time_str) } | |
end | |
def apply(data) | |
cover?(data[:begin_time]) || cover?(data[:end_time]) | |
end | |
end | |
class DurationRange < Base | |
include RangePattern | |
def initialize(pattern) | |
parse_range_pattern(pattern, &:to_i) | |
end | |
def apply(data) | |
cover?(data[:duration]) | |
end | |
end | |
class ParamsRegexp < Base | |
def initialize(regexp) | |
@regexp = regexp | |
end | |
def apply(data) | |
@regexp.match?(data[:parameters]) | |
end | |
end | |
class LogsRegexp < Base | |
def initialize(regexp) | |
@regexp = regexp | |
end | |
def apply(data) | |
data[:logs].any? { |log| @regexp.match?(log[:message]) } | |
end | |
end | |
end | |
class Selector | |
class << self | |
def filter_types | |
@filter_types ||= { | |
request_ids: [Filter::RequestId, :pre_filter], | |
controller_actions: [Filter::ControllerAction, :pre_filter], | |
http_method: [Filter::HttpMethod, :pre_filter], | |
http_status: [Filter::HttpStatus, :filter], | |
time_range: [Filter::TimeRange, :filter], | |
duration_range: [Filter::DurationRange, :filter], | |
params_regexp: [Filter::ParamsRegexp, :filter], | |
logs_regexp: [Filter::LogsRegexp, :filter], | |
} | |
end | |
end | |
def initialize | |
@pre_filters = [] | |
@filters = [] | |
end | |
def pre_filter(data) | |
apply_filters(data, @pre_filters) | |
end | |
def run(data, &block) | |
apply_filters(data, @filters, &block) | |
end | |
def add_filter(name, option) | |
klass, type = self.class.filter_types.fetch(name) | |
filter = klass.new(option) | |
if type == :pre_filter | |
@pre_filters << filter | |
else | |
@filters << filter | |
end | |
end | |
private | |
def apply_filters(data, filters) | |
if filters.empty? | |
result = true | |
else | |
result = filters.all? { |filter| filter.apply(data) } | |
end | |
return yield(data) if result && block_given? | |
result | |
end | |
end | |
class Printer | |
module Driver | |
class Base | |
def initialize(output_directory:, include_debug:) | |
@output_directory = output_directory | |
@include_debug = include_debug | |
end | |
def print(data) | |
with_output(data) do |output| | |
print_data(output, data) | |
end | |
end | |
private | |
def suffix | |
self.class::SUFFIX | |
end | |
def print_data(_output, _data) | |
raise NotImplementedError | |
end | |
def with_output(data, &block) | |
return block.call($stdout) unless @output_directory | |
file = data[:id] | |
sub_dir = file[-2, 2] | |
dir = "#{@output_directory}/#{sub_dir}" | |
FileUtils.mkdir_p(dir) | |
File.open("#{dir}/#{file}#{suffix}", File::CREAT | File::TRUNC | File::WRONLY, &block) | |
end | |
def select_logs(data) | |
logs = [] | |
data[:logs].each do |log| | |
next unless @include_debug || log[:severity] != 'DEBUG' | |
if block_given? | |
logs = yield(log) | |
else | |
logs << log | |
end | |
end | |
logs | |
end | |
end | |
class Null < Base | |
private | |
def print_data(*); end | |
end | |
class Text < Base | |
DATETIME_FORMAT = '%FT%T.%6N' | |
SUFFIX = '.txt' | |
private | |
def print_data(output, data) | |
print_header(output, data) | |
print_body(output, data) | |
output.puts unless @output_directory | |
end | |
def print_header(output, data) | |
output.puts "time: #{data[:begin_time].strftime(DATETIME_FORMAT)} .. #{data[:end_time].strftime(DATETIME_FORMAT)}" | |
output.puts "request_id: #{data[:request_id]}" if data[:request_id] | |
output.print <<~END_OF_HEADER | |
pid: #{data[:pid]} | |
status: #{data[:http_status]} | |
duration: #{data[:duration]}ms | |
END_OF_HEADER | |
end | |
def print_body(output, data) | |
select_logs(data) do |log| | |
output.printf "[%<interval>8.3f] %<message>s\n", interval: log[:interval] * 1_000, message: log[:message] | |
end | |
end | |
end | |
class Json < Base | |
DATETIME_FORMAT = '%FT%T.%6N%:z' | |
SUFFIX = '.json' | |
private | |
def print_data(output, data) | |
json = data.slice( | |
:request_id, :http_status, :http_method, :controller, :action, | |
:duration, :path, :parameters, :client | |
) | |
json[:begin_time] = data[:begin_time].strftime(DATETIME_FORMAT) | |
json[:end_time] = data[:end_time].strftime(DATETIME_FORMAT) | |
json[:pid] = data[:pid].to_i | |
json[:logs] = [] | |
select_logs(data) do |log| | |
json[:logs] << log.merge(time: log[:time].strftime(DATETIME_FORMAT)) | |
end | |
output.puts JSON.fast_generate(json) | |
end | |
end | |
class Raw < Base | |
SUFFIX = '.log' | |
private | |
def print_data(output, data) | |
if data.key?(:orig_logs) | |
output.puts data[:orig_logs] | |
return | |
end | |
data[:logs].each do |log| | |
output.printf( | |
"%<sev>s, [%<time>s #%<pid>d] %<sevirity>5s -- : %<reqid>s%<message>s\n", | |
sev: log[:severity][0], | |
sevirity: log[:severity], | |
time: log[:time].strftime('%FT%T.%6N'), | |
pid: data[:pid], | |
reqid: data[:request_id] ? "[#{data[:request_id]}] " : '', | |
message: log[:message] | |
) | |
end | |
end | |
end | |
class Groonga < Base | |
def initialize(*) | |
super | |
raise ArgumentError, 'output directory is required' unless @output_directory | |
FileUtils.mkdir_p(@output_directory) | |
@groonga_index = GroongaIndex.new(@output_directory, create: true) | |
end | |
def print(data) | |
data[:logs] = select_logs(data) | |
@groonga_index.store(data) | |
end | |
end | |
end | |
DRIVERS = { | |
text: Driver::Text, | |
json: Driver::Json, | |
raw: Driver::Raw, | |
null: Driver::Null, | |
groonga: Driver::Groonga, | |
}.freeze | |
attr_accessor :output_directory, :include_debug | |
def initialize | |
@output_directory = nil | |
@include_debug = true | |
@driver = nil | |
self.driver_class = :text | |
end | |
def driver_class=(type) | |
@driver_class = drivers[type] | |
raise ArgumentError unless @driver_class | |
end | |
def print(data) | |
@driver ||= initialize_driver | |
@driver.print(data) | |
end | |
private | |
def initialize_driver | |
@driver_class.new(output_directory: output_directory, include_debug: @include_debug) | |
end | |
def drivers | |
self.class::DRIVERS | |
end | |
end | |
class CommandlineOption | |
class << self | |
def parse(selector:, printer:) | |
options = new(selector: selector, printer: printer) | |
OptionParser.new do |parser| | |
options.define_options(parser) | |
parser.parse! | |
end | |
options | |
end | |
end | |
attr_accessor :debug | |
def initialize(selector:, printer:) | |
@selector = selector | |
@printer = printer | |
@debug = false | |
end | |
def define_options(parser) | |
define_banner(parser) | |
define_filter_options(parser) | |
define_printer_options(parser) | |
define_common_options(parser) | |
end | |
private | |
def define_banner(parser) | |
parser.banner = "usage: #{$0} [options] [log-files...]" | |
end | |
def define_filter_options(parser) | |
parser.separator '' | |
parser.separator 'Filter options:' | |
define_request_id_option(parser) | |
define_controller_action_option(parser) | |
define_http_method_option(parser) | |
define_http_status_option(parser) | |
define_duration_range_option(parser) | |
define_time_range_option(parser) | |
define_params_regexp_option(parser) | |
define_logs_regexp_option(parser) | |
end | |
def define_printer_options(parser) | |
parser.separator '' | |
parser.separator 'Printer options:' | |
define_output_option(parser) | |
define_hide_debug_log_option(parser) | |
define_raw_printer_option(parser) | |
define_json_printer_option(parser) | |
define_groonga_printer_option(parser) | |
define_null_printer_option(parser) | |
end | |
def define_common_options(parser) | |
parser.separator '' | |
parser.separator 'Common options:' | |
parser.on_tail('--debug', 'Enable debug print') do | |
self.debug = true | |
end | |
parser.on_tail('-h', '--help', 'Show help') do | |
puts parser | |
exit | |
end | |
end | |
def define_request_id_option(parser) | |
parser.on( | |
'-I', '--request-ids=IDS', Array, | |
'Filter by request-id', | |
) do |reqids| | |
@selector.add_filter(:request_ids, reqids) | |
end | |
end | |
def define_controller_action_option(parser) | |
parser.on( | |
'-a', '--action-names=NAMES', Array, | |
'Filter by controller and action names', | |
'ex: "FooController#index,BarController,baz#show,..."' | |
) do |names| | |
@selector.add_filter(:controller_actions, names) | |
end | |
end | |
def define_http_status_option(parser) | |
parser.on( | |
'-s', '--statuses=STATUSES', String, | |
'Filter by statuses', | |
'ex: "3,20,!201,..."' | |
) do |pattern| | |
begin | |
@selector.add_filter(:http_status, pattern) | |
rescue ArgumentError | |
raise OptionParser::InvalidArgument, | |
'expects statuse pattern (ex: "200,302", "3,20,!201,304", or "!4,5")' | |
end | |
end | |
end | |
def define_http_method_option(parser) | |
parser.on( | |
'-m', '--method=METHOD', String, | |
'Filter by HTTP method name' | |
) do |http_method| | |
@selector.add_filter(:http_method, http_method) | |
end | |
end | |
def define_time_range_option(parser) | |
parser.on( | |
'-t', '--time-range=TIME_RANGE', String, | |
'Filter by time range', | |
'ex: "2018-01-02 12:00..2018-02-01 12:00", "1/2 12:00...2/2 12:00", or "3/5,60"' | |
) do |pattern| | |
begin | |
@selector.add_filter(:time_range, pattern) | |
rescue ArgumentError | |
raise OptionParser::InvalidArgument, | |
'expects time range format (ex: "time1..time2", "time1...time2", or "time,delta")' | |
end | |
end | |
end | |
def define_duration_range_option(parser) | |
parser.on( | |
'-d', '--duration-range=DUR_RANGE', String, | |
'Filter by duration range [ms]', | |
'ex: "10..200", "10...200", or "300,10"' | |
) do |pattern| | |
begin | |
@selector.add_filter(:duration_range, pattern) | |
rescue ArgumentError | |
raise OptionParser::InvalidArgument, | |
'expects duration range format (ex: "ms1..ms2", "ms1...ms2", or "ms,delta")' | |
end | |
end | |
end | |
def define_params_regexp_option(parser) | |
parser.on( | |
'-P', '--params-regexp=REGEXP', Regexp, | |
'Filter by parameters', | |
%q(ex: '"foo"=>"ba[rz]"') | |
) do |regexp| | |
@selector.add_filter(:params_regexp, regexp) | |
end | |
end | |
def define_logs_regexp_option(parser) | |
parser.on( | |
'-L', '--logs-regexp=REGEXP', Regexp, | |
'Filter by log messages', | |
%q(ex: '"^ Rendering .*\.json"') | |
) do |regexp| | |
@selector.add_filter(:logs_regexp, regexp) | |
end | |
end | |
def define_output_option(parser) | |
parser.on( | |
'-o', '--output=DIR', | |
'Output to directory' | |
) do |dir| | |
@printer.output_directory = dir | |
end | |
end | |
def define_hide_debug_log_option(parser) | |
parser.on( | |
'-D', '--[no-]hide-debug-logs', | |
"Hide DEBUG logs" | |
) do |value| | |
@printer.include_debug = !value | |
end | |
end | |
def define_json_printer_option(parser) | |
parser.on( | |
'-J', '--json', | |
'Output in JSON' | |
) do | |
begin | |
require 'json' | |
@printer.driver_class = :json | |
rescue LoadError | |
raise OptionParser::InvalidArgument, 'requires JSON library (try "gem i json")' | |
end | |
end | |
end | |
def define_groonga_printer_option(parser) | |
parser.on( | |
'-G', '--groonga', | |
'Output to Groonga database' | |
) do | |
begin | |
require 'rroonga' | |
rescue LoadError => e | |
raise OptionParser::InvalidArgument, | |
"requires Groonga library (#{e.message}; try \"gem i rroonga\")" | |
end | |
begin | |
require_relative 'groonga_rails_log' | |
rescue LoadError => e | |
raise OptionParser::InvalidArgument, "requires groonga_rails_log.rb (#{e.message})" | |
end | |
@printer.driver_class = :groonga | |
end | |
end | |
def define_raw_printer_option(parser) | |
parser.on( | |
'-R', '--raw', | |
'Output in raw format' | |
) do | |
@printer.driver_class = :raw | |
end | |
end | |
def define_null_printer_option(parser) | |
parser.on( | |
'-N', '--no-output', | |
'No output' | |
) do | |
@printer.driver_class = :null | |
end | |
end | |
end | |
if $0 == __FILE__ | |
count = 0 | |
counter = lambda do | |
loop do | |
sleep 1 | |
print "\r#{count}" | |
end | |
ensure | |
puts "\r#{count}" | |
end | |
begin | |
selector = Selector.new | |
printer = Printer.new | |
options = CommandlineOption.parse(selector: selector, printer: printer) | |
index = Index.new(ARGF) | |
if $stdout.tty? && printer.output_directory | |
counter_th = Thread.new { counter.call } | |
end | |
index.select(selector) do |data| | |
count += 1 | |
printer.print(data) | |
end | |
rescue Errno::EPIPE, Interrupt | |
# noop | |
rescue StandardError => e | |
raise if options&.debug | |
abort e.message | |
ensure | |
counter_th&.kill | |
end | |
exit(count.zero? ? 1 : 0) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
groonga_rails_log.rb
https://gist.github.com/arika/56d58e30eb79b39f0cb6ef3bbc5e2346