adamloving · April 1, 2013 21:15
diff --git a/useragent.rb b/useragent.rb
 # h = {}; Event.all.each { |e| h[e.useragent] = (h[e.useragent] || 0) + 1;  }
 # h.keys.each { |k| puts "#{h[k]},#{k}" 
 class UserAgent
  # gplus => "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0"
  BOTS = [
    { :name => 'AppEngine',             :match => 'AppEngine',            :url => 'http://code.google.com/appengine'},
    { :name => 'Embedly',               :match => 'Embedly',              :url => 'http://support.embed.ly/'},
    { :name => 'news.me',               :match => 'news.me',              :url => ''},
    { :name => 'Voyager',               :match => 'Voyager/1.0',          :url => ''},
    { :name => 'bitlybot',              :match => 'bitlybot',             :url => ''},
    { :name => 'InAGist',               :match => 'InAGist',              :url => 'http://inagist.com'},
    { :name => 'Crowsnest',             :match => 'Crowsnest',            :url => 'http://www.crowsnest.tv'},
    { :name => 'Apache-HttpClient',     :match => 'Apache-HttpClient',    :url => ''},
    { :name => 'RockMeltEmbedService',  :match => 'RockMeltEmbedService', :url => ''},
    { :name => 'ShowyouBot',            :match => 'ShowyouBot',           :url => 'http://showyou.com/crawler'},
    { :name => 'Jakarta Commons',       :match => 'Jakarta Commons',      :url => ''},
    { :name => 'Java',                  :match => 'Java',                 :url => ''},
    { :name => 'CURL',                  :match => 'CURL',                 :url => ''},
    { :name => 'PycURL',                :match => 'PycURL',               :url => ''},
    { :name => 'percbotspider',         :match => 'percbotspider',        :url => ''},
    { :name => 'strawberryj.am',        :match => 'strawberryj.am',       :url => 'http://strawberryj.am'},
    { :name => 'Flipboard',             :match => 'FlipboardProxy',       :url => 'http://flipboard.com/browserproxy'},
    { :name => 'Google Feedfetcher',    :match => 'Feedfetcher',          :url => 'http://www.google.com/feedfetcher.html'},
    { :name => 'Google Plus',           :match => '20110814',             :url => 'http://plus.google.com'},
    { :name => 'EventMachine',          :match => 'EventMachine',         :url => 'http://rubyeventmachine.com/' }, 
    { :name => 'Python urllib',         :match => 'urllib',               :url => 'http://docs.python.org/library/urllib.html' },
    { :name => 'Gnip',                  :match => 'gnip.com',             :url => 'http://gnip.com' },
    { :name => 'Paper.li',              :match => 'PaperLiBot',           :url => 'http://paper.li' },
    { :name => 'TweetMeme',             :match => 'TweetmemeBot',         :url => 'http://tweetmeme.com' },
    { :name => 'Facebook',              :match => 'facebookexternalhit',  :url => 'http://www.facebook.com' },
    { :name => 'LinkedIn',              :match => 'LinkedInBot',          :url => 'http://linkedin.com' },
    { :name => 'Topsy',                 :match => 'Butterfly',            :url => 'http://topsy.com' },
    { :name => 'Summify',               :match => 'Summify',              :url => 'http://summify.com' },
    { :name => 'Twitterbot',            :match => 'Twitterbot',           :url => 'http://twitter.com' },
    { :name => 'Echo',                  :match => 'JS-Kit',               :url => 'http://aboutecho.com' },
    { :name => 'Yandex',                :match => 'YandexBot',            :url => 'http://yandex.com' },
    { :name => 'MetaURI',               :match => 'MetaURI',              :url => 'http://metauri.com'},
    { :name => 'Bitly',                 :match => 'bitlybot ',            :url => 'http://bitly.com' },
    { :name => 'TweetedTimes',          :match => 'TweetedTimes',         :url => 'http://tweetedtimes.com' },
    { :name => 'Google',                :match => 'Googlebot',            :url => 'http://google.com' },
    { :name => 'Evri',                  :match => 'Evrinid',              :url => 'http://evri.com' },
    { :name => 'LongURL',               :match => 'LongURL',              :url => 'http://longurl.org' },
    { :name => 'Topixtream',            :match => 'Castabot',             :url => 'http://topixtream.com' },
    { :name => 'urlresolver',           :match => 'urlresolver',          :url => 'http://ant.apache.org/ivy/history/latest-milestone/resolver/url.html' },
    { :name => 'cURL',                  :match => 'curl',                 :url => 'http://curl.haxx.se' },
    { :name => 'NineConnections.com',   :match => 'Kimengi',              :url => 'http://NineConnections.com'},
    { :name => 'Status.net (Laconica)', :match => 'Laconica',             :url => 'http://status.net/' },
    { :name => 'Showyou.com',           :match => 'Showyoubot',           :url => 'http://showyou.com' },
    { :name => 'PostPost',              :match => 'PostPost',             :url => 'http://postpost.com' },
    { :name => 'OGS Critter',           :match => 'OGS Critter',          :url => 'http://google.com/?OGS+Critter' },  
    { :name => 'postrank',              :match => 'PostRank',             :url => 'http://postrank.com' },
    { :name => 'percbotspider',         :match => 'percobotspider',       :url => 'http://google.com/?percobotspider'},
    { :name => 'Equentia',              :match => 'EQUENTIA-BOT',         :url => 'http://eqentia.com' },
    { :name => 'yolinkBot',             :match => 'yolinkBot',            :url => ''},
    { :name => 'NING',                  :match => 'NING',                 :url => ''},
    { :name => 'Instapaper',            :match => 'instapaper',           :url => ''},
    { :name => 'Blekkobot',             :match => 'blekko',               :url => 'http://blekko.com/about/blekkobot'},
    { :name => 'Baiduspider',           :match => 'Baiduspider',          :url => 'http://www.baidu.com/search/spider.html'},
    { :name => 'Ruby',                  :match => 'Ruby',                 :url => ''},
    { :name => 'Diffbot',               :match => 'diffbot',              :url => 'http://diffbot.com'},
    { :name => 'FriendFeedBot',         :match => 'friendfeed',           :url => 'http://friendfeed.com/about/bot'},
    { :name => 'Vancouver',             :match => 'Vancouver',            :url => ''},
    { :name => 'ScribdReader',          :match => 'ScribdReader',         :url => 'http://www.float.com'},
    { :name => 'HiveAnalyzer',          :match => 'HiveAnalyzer',         :url => 'http://www.businessinsider.com'},
    { :name => 'Yeti',                  :match => 'Yeti',                 :url => 'http://help.naver.com/robots/'},
    { :name => 'MFE_expand',            :match => 'MFE_expand',           :url => ''},
    { :name => 'intigi',                :match => 'intigi',               :url => 'http://intigi.com'},
    { :name => 'Trove',                 :match => 'Trove',                :url => ''},
    { :name => 'EQENTIA-BOT',           :match => 'EQENTIA-BOT',          :url => ''},
    { :name => 'JS-Kit',                :match => 'js-kit',               :url => 'http://js-kit.com/'},
    { :name => 'spider',                :match => 'spider',               :url => ''},
    { :name => 'bingbot',               :match => 'bingbot',              :url => 'http://www.bing.com/bingbot.htm'},
    { :name => 'coccoc',                :match => 'coccoc',               :url => ''}
  ]

  def self.is_bot?(useragent)
    return unless useragent.present?
    BOTS.each do |b|
      return b if useragent.index(b[:match])
    end
    false
  end
  
  def self.is_facebook_bot?(useragent)
    useragent.to_s.index('facebookexternalhit').present?
  end

  def self.is_linkedin_bot?(useragent)
    useragent.to_s.index('LinkedInBot').present?
  end

  def self.is_gplus_bot?(useragent)
    useragent.to_s.index('Gecko/20110814 Firefox/6.0').present?
  end

  def self.needs_cloaking?(useragent)
    self.is_facebook_bot?(useragent) || self.is_linkedin_bot?(useragent) || self.is_gplus_bot?(useragent)
  end
 end
	# h = {}; Event.all.each { \|e\| h[e.useragent] = (h[e.useragent] \|\| 0) + 1; }
	# h.keys.each { \|k\| puts "#{h[k]},#{k}"
	class UserAgent
	# gplus => "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0"
	BOTS = [
	{ :name => 'AppEngine', :match => 'AppEngine', :url => 'http://code.google.com/appengine'},
	{ :name => 'Embedly', :match => 'Embedly', :url => 'http://support.embed.ly/'},
	{ :name => 'news.me', :match => 'news.me', :url => ''},
	{ :name => 'Voyager', :match => 'Voyager/1.0', :url => ''},
	{ :name => 'bitlybot', :match => 'bitlybot', :url => ''},
	{ :name => 'InAGist', :match => 'InAGist', :url => 'http://inagist.com'},
	{ :name => 'Crowsnest', :match => 'Crowsnest', :url => 'http://www.crowsnest.tv'},
	{ :name => 'Apache-HttpClient', :match => 'Apache-HttpClient', :url => ''},
	{ :name => 'RockMeltEmbedService', :match => 'RockMeltEmbedService', :url => ''},
	{ :name => 'ShowyouBot', :match => 'ShowyouBot', :url => 'http://showyou.com/crawler'},
	{ :name => 'Jakarta Commons', :match => 'Jakarta Commons', :url => ''},
	{ :name => 'Java', :match => 'Java', :url => ''},
	{ :name => 'CURL', :match => 'CURL', :url => ''},
	{ :name => 'PycURL', :match => 'PycURL', :url => ''},
	{ :name => 'percbotspider', :match => 'percbotspider', :url => ''},
	{ :name => 'strawberryj.am', :match => 'strawberryj.am', :url => 'http://strawberryj.am'},
	{ :name => 'Flipboard', :match => 'FlipboardProxy', :url => 'http://flipboard.com/browserproxy'},
	{ :name => 'Google Feedfetcher', :match => 'Feedfetcher', :url => 'http://www.google.com/feedfetcher.html'},
	{ :name => 'Google Plus', :match => '20110814', :url => 'http://plus.google.com'},
	{ :name => 'EventMachine', :match => 'EventMachine', :url => 'http://rubyeventmachine.com/' },
	{ :name => 'Python urllib', :match => 'urllib', :url => 'http://docs.python.org/library/urllib.html' },
	{ :name => 'Gnip', :match => 'gnip.com', :url => 'http://gnip.com' },
	{ :name => 'Paper.li', :match => 'PaperLiBot', :url => 'http://paper.li' },
	{ :name => 'TweetMeme', :match => 'TweetmemeBot', :url => 'http://tweetmeme.com' },
	{ :name => 'Facebook', :match => 'facebookexternalhit', :url => 'http://www.facebook.com' },
	{ :name => 'LinkedIn', :match => 'LinkedInBot', :url => 'http://linkedin.com' },
	{ :name => 'Topsy', :match => 'Butterfly', :url => 'http://topsy.com' },
	{ :name => 'Summify', :match => 'Summify', :url => 'http://summify.com' },
	{ :name => 'Twitterbot', :match => 'Twitterbot', :url => 'http://twitter.com' },
	{ :name => 'Echo', :match => 'JS-Kit', :url => 'http://aboutecho.com' },
	{ :name => 'Yandex', :match => 'YandexBot', :url => 'http://yandex.com' },
	{ :name => 'MetaURI', :match => 'MetaURI', :url => 'http://metauri.com'},
	{ :name => 'Bitly', :match => 'bitlybot ', :url => 'http://bitly.com' },
	{ :name => 'TweetedTimes', :match => 'TweetedTimes', :url => 'http://tweetedtimes.com' },
	{ :name => 'Google', :match => 'Googlebot', :url => 'http://google.com' },
	{ :name => 'Evri', :match => 'Evrinid', :url => 'http://evri.com' },
	{ :name => 'LongURL', :match => 'LongURL', :url => 'http://longurl.org' },
	{ :name => 'Topixtream', :match => 'Castabot', :url => 'http://topixtream.com' },
	{ :name => 'urlresolver', :match => 'urlresolver', :url => 'http://ant.apache.org/ivy/history/latest-milestone/resolver/url.html' },
	{ :name => 'cURL', :match => 'curl', :url => 'http://curl.haxx.se' },
	{ :name => 'NineConnections.com', :match => 'Kimengi', :url => 'http://NineConnections.com'},
	{ :name => 'Status.net (Laconica)', :match => 'Laconica', :url => 'http://status.net/' },
	{ :name => 'Showyou.com', :match => 'Showyoubot', :url => 'http://showyou.com' },
	{ :name => 'PostPost', :match => 'PostPost', :url => 'http://postpost.com' },
	{ :name => 'OGS Critter', :match => 'OGS Critter', :url => 'http://google.com/?OGS+Critter' },
	{ :name => 'postrank', :match => 'PostRank', :url => 'http://postrank.com' },
	{ :name => 'percbotspider', :match => 'percobotspider', :url => 'http://google.com/?percobotspider'},
	{ :name => 'Equentia', :match => 'EQUENTIA-BOT', :url => 'http://eqentia.com' },
	{ :name => 'yolinkBot', :match => 'yolinkBot', :url => ''},
	{ :name => 'NING', :match => 'NING', :url => ''},
	{ :name => 'Instapaper', :match => 'instapaper', :url => ''},
	{ :name => 'Blekkobot', :match => 'blekko', :url => 'http://blekko.com/about/blekkobot'},
	{ :name => 'Baiduspider', :match => 'Baiduspider', :url => 'http://www.baidu.com/search/spider.html'},
	{ :name => 'Ruby', :match => 'Ruby', :url => ''},
	{ :name => 'Diffbot', :match => 'diffbot', :url => 'http://diffbot.com'},
	{ :name => 'FriendFeedBot', :match => 'friendfeed', :url => 'http://friendfeed.com/about/bot'},
	{ :name => 'Vancouver', :match => 'Vancouver', :url => ''},
	{ :name => 'ScribdReader', :match => 'ScribdReader', :url => 'http://www.float.com'},
	{ :name => 'HiveAnalyzer', :match => 'HiveAnalyzer', :url => 'http://www.businessinsider.com'},
	{ :name => 'Yeti', :match => 'Yeti', :url => 'http://help.naver.com/robots/'},
	{ :name => 'MFE_expand', :match => 'MFE_expand', :url => ''},
	{ :name => 'intigi', :match => 'intigi', :url => 'http://intigi.com'},
	{ :name => 'Trove', :match => 'Trove', :url => ''},
	{ :name => 'EQENTIA-BOT', :match => 'EQENTIA-BOT', :url => ''},
	{ :name => 'JS-Kit', :match => 'js-kit', :url => 'http://js-kit.com/'},
	{ :name => 'spider', :match => 'spider', :url => ''},
	{ :name => 'bingbot', :match => 'bingbot', :url => 'http://www.bing.com/bingbot.htm'},
	{ :name => 'coccoc', :match => 'coccoc', :url => ''}
	]

	def self.is_bot?(useragent)
	return unless useragent.present?
	BOTS.each do \|b\|
	return b if useragent.index(b[:match])
	end
	false
	end

	def self.is_facebook_bot?(useragent)
	useragent.to_s.index('facebookexternalhit').present?
	end

	def self.is_linkedin_bot?(useragent)
	useragent.to_s.index('LinkedInBot').present?
	end

	def self.is_gplus_bot?(useragent)
	useragent.to_s.index('Gecko/20110814 Firefox/6.0').present?
	end

	def self.needs_cloaking?(useragent)
	self.is_facebook_bot?(useragent) \|\| self.is_linkedin_bot?(useragent) \|\| self.is_gplus_bot?(useragent)
	end
	end