Last active
May 16, 2016 12:50
-
-
Save gmcmillan/21f8e9ecb45cfeb1cce2 to your computer and use it in GitHub Desktop.
Ruby Class to Query Google Safe Browsing API in Batches
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Note: POST requests are limited to 500 domains each with the SB API, which is why this code splits it into batches | |
require 'net/https' | |
require 'open3' | |
class SafeBrowsing | |
attr_reader :opt | |
attr_accessor :results | |
def initialize(opt={}) | |
@opt = opt | |
@results = {} | |
end | |
# Public: Accept Array of domains, split it into batches of | |
# 500 and use check_batch method to query Safe Browsing API. | |
# | |
# domains - Array of domain names to check | |
# | |
# Example: | |
# | |
# check(['domain.com', 'domain2.com']) | |
# # => {{"domain.com"=>{:type=>"malware", :owner=>"dom"}} | |
# | |
# Returns Hash of infected domains | |
def check(domains) | |
domains.each_slice(500) do |slice| | |
resp = check_batch(slice) | |
resp.each do |k,entry| | |
if !entry[:safe] | |
owner = find_domain_owner(entry[:domain]) | |
results[entry[:domain]] = {type: entry[:type], owner: owner} | |
end | |
end | |
end | |
results | |
end | |
private | |
# Private: Take domains Array and run Safe Browsing API check | |
# against each domain then return Hash of results. | |
# | |
# domains - Array of domains (maximum of 500) | |
# | |
# Example: | |
# | |
# check_batch(['domain.com', 'domain2.com']) | |
# # => {0=>{:domain=>"domain.com", :safe=>true, :type=>"ok"}, 1=>{:domain=>"domain2.com", :safe=>true, :type=>"ok"} | |
def check_batch(domains) | |
batch = {} | |
domain_list = index_array(domains) | |
resp_list = index_array(safe_api(domains)) | |
total = domains.size | |
(0..total-1).each do |n| | |
is_safe = resp_list[n].include?("ok") ? true : false | |
batch[n] = {domain: domain_list[n], safe: is_safe, type: resp_list[n]} | |
end | |
batch | |
end | |
# Private: Take Array and convert it to a Hash and index it by making the key | |
# an Integer. | |
# | |
# array - The Array that we will be indexing | |
# | |
# Example: | |
# | |
# index_array(['zero', 'one, 'two']) | |
# # => {0=>"zero", 1=>"one", 2=>"two"} | |
# | |
# Returns Hash | |
def index_array(array) | |
Hash[array.map.with_index {|v,i| [i,v]}] | |
end | |
# Private: Take a String and find the owner (username) of the account on | |
# the server. | |
# | |
# domain - String of the domain name | |
# | |
# Example: | |
# | |
# find_domain_owner("domain.com") | |
# # => "username" | |
# | |
# Returns String | |
def find_domain_owner(domain) | |
run_command("/scripts/whoowns #{domain}")[:stdout].chomp | |
end | |
# Private: Run a system command using the Open3 library and return a Hash | |
# containing the stdout, stderr, and exit status. | |
# | |
# cmd - The system command you want to run. | |
# | |
# Examples | |
# | |
# run_command('whoami') | |
# # => {:stdout=>"root\n", :stderr=>"", :exitstatus=>true} | |
# | |
# Returns Hash of stdout, stderr, and exit status of the command run. | |
def run_command(cmd) | |
stdout, stderr, status = Open3.capture3(cmd) | |
{ | |
:stdout => stdout, | |
:stderr => stderr, | |
:exitstatus => status.exitstatus == 0 | |
} | |
end | |
# Private: Take Array of no more than 500 domains and send POST request | |
# to Google Safe Browsing API and return results as Array. | |
# | |
# domains - Array of domains to check | |
# | |
# Example: | |
# | |
# safe_api(['domain1.com', 'domain2.com']) | |
# # => ["malware", "ok"] | |
# | |
# Returns Array | |
def safe_api(domains) | |
raise "Error: Domains Array contained more than 500 domains" if domains.size>500 | |
http = Net::HTTP.new('sb-ssl.google.com', 443) | |
http.use_ssl = true | |
req = Net::HTTP::Post.new("/safebrowsing/api/lookup?client=#{opt[:client]}&key=#{opt[:key]}&appver=0.1&pver=3.1") | |
req.body = form_request_body(domains) | |
resp = http.request(req) | |
case resp.code.to_i | |
when 200 | |
# one or more domains in the list has malware or phishing content | |
resp.body.split("\n") | |
when 204 | |
# google returns a 204 and empty body if all domains were clean, so we need to simulate | |
# the correct number of "ok" lines | |
("ok\n" * domains.size).split("\n") | |
when 400 | |
raise "400 - Bad Request: The HTTP request was not correctly formed." | |
when 401 | |
raise "401 - Not Authorized: The API key is not authorized." | |
when 503 | |
raise "503 - Service Unavailable: The server cannot handle the request." | |
end | |
end | |
# Private: Take Array of domain names and re-map them to include | |
# the http:// prefix (Google API Requirement) and add the | |
# total number of domains in the request as the first element | |
# of the Array, then convert to String. | |
# | |
# domains - Array of domains | |
# | |
# Example: | |
# | |
# form_request_body(['domain.com', 'domain2.com']) | |
# "2\nhttp://domain.com\nhttp://domain2.com" | |
# | |
# Returns String | |
def form_request_body(domains) | |
payload = domains.map {|domain| "http://#{domain}" } | |
payload.unshift(domains.size).join("\n") | |
end | |
end | |
# safe = SafeBrowsing.new( | |
# :client => "google_dev_client", | |
# :key => "google_dev_key" | |
# ) | |
# domains = IO.read("/etc/localdomains").split | |
# puts safe.check(domains) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to run this script ?