Simple command line tool for Google web searches.
Inspired by jarun/google-cli.
This script scrapes the search results.
$ ./goocl foobar
$ echo 'hello, world' | ./goocl | fzf --read0
Simple command line tool for Google web searches.
Inspired by jarun/google-cli.
This script scrapes the search results.
$ ./goocl foobar
$ echo 'hello, world' | ./goocl | fzf --read0
source 'https://rubygems.org' | |
gem 'nokogiri' | |
gem 'slop' |
#!/usr/bin/env ruby | |
require 'nokogiri' | |
require 'open-uri' | |
require 'slop' | |
require 'uri' | |
UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36' | |
URL_PREFIX='https://www.' | |
TLD='com' | |
QUERY_TEMPL='/search?{}' | |
DEFAULT_PARAMS = { | |
ie: 'UTF-8', | |
oe: 'UTF-8', | |
hl: 'en', | |
} | |
XPATH_ITEM = '//*[@id="rso"]/div/div[*]/div' | |
# XPATH_ITEM = '//*[@id="rso"]/div/div/div[*]/div' | |
XPATH_TITLE = 'h3/a' | |
# Scraped from: | |
# https://en.wikipedia.org/wiki/List_of_Google_domains | |
# on Jan 04, 2016 | |
GOOGLE_TLD = %w( | |
ac ad ae al am as at az ba be bf bg bi bj bs bt by ca cat cc cd cf cg ch | |
ci cl cm cn cv cz de dj dk dm dz ee es fi fm fr ga ge gf gg gl gm gp gr gy | |
hn hr ht hu ie im io iq is it je jo kg ki kz la li lk lt lu lv md me mg mk | |
ml mn ms mu mv mw ne nl no nr nu pl pn ps pt ro rs ru rw sc se sh si sk sm | |
sn so sr st td tg tk tl tm tn to tt vg vu ws | |
) | |
GOOGLE_CO_TLD = %w( | |
ao bw ck cr id il in jp ke kr ls ma mz nz pn th tz ug uk uz ve vi za zm zw | |
) | |
GOOGLE_COM_TLD = %w( | |
af ag ai ar au bd bh bn bo br bz co cu cy do ec eg et fj gh gi gt hk jm kh | |
kw lb lc ly mm mt mx my na nf ng ni np om pa pe pg ph pk pr py qa sa sb sg | |
sl sv tj tr tw ua uy vc vn | |
) | |
class GooclResult | |
attr_accessor :title | |
attr_accessor :url | |
def to_s(print0 = false) | |
<<-EOF | |
#{@title} | |
#{@url}#{print0 ? "\0" : ''} | |
EOF | |
end | |
end | |
def construct_domain(country = nil) | |
case | |
when GOOGLE_TLD.include?(country) | |
"google.#{country}" | |
when GOOGLE_CO_TLD.include?(country) | |
"google.co.#{country}" | |
when GOOGLE_COM_TLD.include?(country) | |
"google.com.#{country}" | |
else | |
STDERR.puts("No such country: #{country}") unless country.nil? | |
'google.com' | |
end | |
end | |
# @param keywords [Array<String>] keywords that will be ANDed together. | |
# | |
# @return [String] a query string. | |
def construct_query(keywords, params = {}) | |
search_params = DEFAULT_PARAMS | |
search_params.merge!(params) | |
search_params_str = URI.encode_www_form(search_params) | |
search_params_str += "&q=#{URI.escape(keywords.join('+'))}" | |
QUERY_TEMPL.sub('{}', search_params_str) | |
end | |
opts = Slop::parse do |o| | |
o.string '-c', '--country', 'The TLD of the country' | |
o.string '-l', '--lang', 'Language', default: 'en' | |
o.bool '-0', '--print0', 'Print null-delimited items' | |
end | |
keywords = File.pipe?(STDIN) ? STDIN.readlines : opts.arguments | |
if keywords.empty? | |
STDERR << "Keyword not given\n" | |
exit 1 | |
end | |
domain = construct_domain(opts[:country]) | |
params = { | |
hl: opts[:lang], | |
} | |
query = construct_query(keywords, params) | |
url = URL_PREFIX + domain + query | |
doc = Nokogiri::HTML.parse(open(url, 'User-Agent' => UA)) | |
# TODO: if pipe | |
doc.xpath(XPATH_ITEM).each do |item| | |
titles = item.xpath(XPATH_TITLE) | |
urls = titles.xpath('@href') | |
r = GooclResult.new | |
r.title = URI.unescape(titles.text).chomp | |
r.url = URI.unescape(urls.text).chomp | |
puts r.to_s(opts.print0?) | |
end |