Last active
January 1, 2016 04:29
-
-
Save mikeda/8092351 to your computer and use it in GitHub Desktop.
bot検出用に作ってるプラグイン
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 一定時間で閾値以上の件数があるkeyを検出する | |
# config: | |
# <store> | |
# type bot_detector | |
# tag bot_count | |
# count_interval 300 | |
# count_key uid | |
# threshold 10000 | |
# limit 10 | |
# </store> | |
# input: | |
# { "uid":"aaa", ...} | |
# { "uid":"bbb", ...} | |
# { "uid":"aaa", ...} | |
# ... | |
# output: | |
# 2013-12-23T15:01:05+09:00 bot_count { "aaa":10232, ...} | |
module Fluent | |
class BotDetector < Output | |
Fluent::Plugin.register_output('bot_detector', self) | |
config_param :tag, :string, :default => nil | |
config_param :count_interval, :time, :default => nil | |
config_param :count_key, :string, :default => nil | |
config_param :threshold, :integer, :default => nil | |
config_param :limit, :integer, :default => nil | |
def configure(conf) | |
super | |
raise Fluent::ConfigError, "tag must be specified" unless @tag | |
raise Fluent::ConfigError, "count_interval must be specified" unless @count_interval | |
raise Fluent::ConfigError, "count_key must be specified" unless @count_interval | |
raise Fluent::ConfigError, "threshold must be specified" unless @threshold | |
raise Fluent::ConfigError, "limit must be specified" unless @threshold | |
end | |
def start | |
super | |
@counts = {} | |
@watcher = Thread.new(&method(:watch)) | |
end | |
def shutdown | |
super | |
end | |
def watch | |
@last_checked ||= Fluent::Engine.now | |
while true | |
sleep 0.5 | |
begin | |
if Fluent::Engine.now - @last_checked >= @count_interval | |
now = Fluent::Engine.now | |
flush_emit | |
@last_checked = now | |
end | |
rescue => e | |
$log.warn "#{e.class} #{e.message} #{e.backtrace.first}" | |
end | |
end | |
end | |
def flush_emit | |
time = Fluent::Engine.now | |
flushed_counts, @counts = @counts, {} | |
output = flushed_counts.select{|key, count| count > @threshold} | |
### もうちょい効率的なの考える? | |
if @limit | |
output_keys = output.keys.sort_by{|k| output[k]}.reverse.take(@limit) | |
output = output.select{|key, count| output_keys.include? key} | |
end | |
# $log.info output.to_s | |
Fluent::Engine.emit(@tag, time, output) | |
end | |
def emit(tag, es, chain) | |
es.each do |time, record| | |
next unless record[@count_key] | |
@counts[record[@count_key]] ||= 0 | |
@counts[record[@count_key]] += 1 | |
end | |
chain.next | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment