Skip to content

Instantly share code, notes, and snippets.

@mikezter
Created September 10, 2010 09:01
Show Gist options
  • Save mikezter/573342 to your computer and use it in GitHub Desktop.
Save mikezter/573342 to your computer and use it in GitHub Desktop.
Very simple logging of bot crawls in your Rails app. Drops to imedo.de
# drops to imedo.de
class ApplicationController < ActionController::Base
around_filter :log_crawlers, :unless => :logged_in?
def log_crawlers
action_start = Time.now
yield
duration = Time.now - action_start
Crawling.log(request, duration)
end
end
# drops to imedo.de
class Crawling < ActiveRecord::Base
def self.log(request, duration)
bot = determine_bot(request.user_agent)
if bot
transaction do
crawling = Crawling.find_or_create_by_url_and_bot(request.request_uri, bot)
crawling.min_duration = duration if crawling.min_duration.nil? || duration < crawling.min_duration
crawling.max_duration = duration if crawling.max_duration.nil? || duration > crawling.max_duration
crawling.sum_duration ||= 0.0
crawling.sum_duration += duration
crawling.last_duration = duration
crawling.times += 1
crawling.save
end
end
end
def self.stats_for(url)
Crawling.find_all_by_url(url)
end
def avg_duration
sum_duration / times.to_f
end
protected
def self.determine_bot(user_agent)
case user_agent
when /googlebot/i then 'GoogleBot'
when /msnbot/i then 'MSNBot'
when /alexa/i then 'Alexa Crawler'
when /yahoo/i then 'Yahoo! Slurp'
when /ask jeeves/i then 'Ask Jeeves/Teoma'
else
nil
end
end
end
# drops to imedo.de
class CreateCrawlings < ActiveRecord::Migration
def self.up
create_table :crawlings do |t|
t.string :url
t.string :bot
t.float :min_duration, :null => false, :default => 0
t.float :max_duration, :null => false, :default => 0
t.float :sum_duration, :null => false, :default => 0
t.float :last_duration, :null => false, :default => 0
t.integer :times, :null => false, :default => 0
t.timestamps
end
add_index(:crawlings, [:url, :bot])
end
def self.down
drop_table :crawlings
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment