Skip to content

Instantly share code, notes, and snippets.

@Phyks
Last active May 7, 2019 21:02
Show Gist options
  • Save Phyks/a9458568ef3b66590a860f0afcc4a59c to your computer and use it in GitHub Desktop.
Save Phyks/a9458568ef3b66590a860f0afcc4a59c to your computer and use it in GitHub Desktop.
Fetch Google "Now" answers from the CLI. Usage: `python3 google_now.py QUERY`.
#!/usr/bin/env python3
import sys
import urllib.parse
import html2text
import scrapy
from scrapy.crawler import CrawlerProcess
results = []
class MyPipeline():
def process_item(self, item, spider):
results.append(dict(item))
class GoogleNowSpider(scrapy.Spider):
name = "googlenow"
def __init__(self, query="", *args, **kwargs):
super().__init__(*args, **kwargs)
self.start_urls = ["https://www.google.fr/search?q=%s" %
urllib.parse.quote(query)]
def parse(self, response):
if len(response.css("h2.r")) > 0:
# Google calc
return self.parse_calc(response)
elif len(response.css("td#rhs_block>*")) > 0:
return self.parse_col(response)
else:
return self.parse_rest(response)
def parse_calc(self, response):
return {
"type": "calc",
"result": response.css("h2.r::text")[0].extract()
}
def parse_col(self, response):
return {
"type": "col",
"result": html2text.html2text(
response.xpath("//td[@id='rhs_block']/ol/*[1]")[0].extract())
}
def parse_rest(self, response):
return {
"type": "other",
"result": html2text.html2text(
response.xpath("//div[@id='ires']/ol/*[1]")[0].extract())
}
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit("Usage: %s QUERY" % sys.argv[0])
process = CrawlerProcess({
'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'ITEM_PIPELINES': {'__main__.MyPipeline': 1},
'LOG_LEVEL': 'ERROR'
})
process.crawl(GoogleNowSpider, query=(" ".join(sys.argv[1:])))
process.start()
print(results[0]["result"])
@EvanDotPro
Copy link

Hey @Phyks, I was wondering if you'd be willing to update this gist with a header comment specifying which license you intend this code to be released under, if any. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment