Skip to content

Instantly share code, notes, and snippets.

View hamletbatista's full-sized avatar

Hamlet Batista hamletbatista

View GitHub Profile
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(4) > div:nth-child(2) > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(4) > div:nth-child(2) > div');
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(3) > div:nth-child(2) > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(3) > div:nth-child(2) > div > div');
() => {
var data = {};
//referring page
data["referring_page"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(1) > div:nth-child(3) > div:nth-child(2) > div > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(1) > div:nth-child(3) > div:nth-child(2) > div');
//last crawl
data["last_crawl"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(2) > div:
import asyncio
from pyppeteer import connect
from time import sleep
async def inspect_urls(urls):
#example URI. Copy and paste from Chrome output
#run Chrome using (Mac):
#mkdir ~/chrome-dev
#/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --no-first-run --user-data-dir=~/chrome-dev
await page.goto('https://search.google.com/search-console?resource_id=https%3A%2F%2Fwww.ranksense.com%2F',
{"waitUntil": "networkidle0"})
print(url)
#example input box selector, changes often
sel= "#gb > div.gb_gd.gb_Md.gb_Zb > div.gb_lc.gb_wd.gb_Fd.gb_rd.gb_vd.gb_Cd > div.gb_td > form > div > div > div > div > div > div.d1dlne > input.Ax4B8.ZAGvjd"
await page.waitForSelector(sel)
await page.click(sel)
#await page.querySelectorEval(sel, "el => el.value = 'Testing 123'") #doen't work
await page.keyboard.type(url) #input url
await page.keyboard.press("Enter")
#wait for retrival
sleep(5)
coverage_selector = "#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div"
try:
await page.waitForSelector(coverage_selector)
await page.click(coverage_selector)
except:
pass #skip click on timeout exception
js_extractor="""
() => {
var data = {};
//referring page
data["referring_page"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(1) > div:nth-child(3) > div:nth-child(2) > div > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(1) > div:nth-child(3) > div:nth-child(2) > div');
//.textContent;
https://www.ranksense.com/
{'referring_page': None, 'last_crawl': 'Apr 2, 2019, 4:03:13 PM', 'crawled_as': 'Googlebot smartphone', 'crawl_allowed': 'Yes', 'indexing_allowed': 'Yes', 'user_declared_canonical': 'https://www.ranksense.com/', 'google_selected_canonical': 'Inspected URL'}
https://www.ranksense.com/how-it-works/
{'referring_page': None, 'last_crawl': 'Apr 1, 2019, 4:18:35 AM', 'crawled_as': 'Googlebot smartphone', 'crawl_allowed': 'Yes', 'indexing_allowed': 'Yes', 'user_declared_canonical': 'https://www.ranksense.com/how-it-works/', 'google_selected_canonical': 'Inspected URL'}
https://www.ranksense.com/pricing/
{'referring_page': None, 'last_crawl': 'Apr 1, 2019, 4:18:15 AM', 'crawled_as': 'Googlebot smartphone', 'crawl_allowed': 'Yes', 'indexing_allowed': 'Yes', 'user_declared_canonical': 'https://www.ranksense.com/pricing/', 'google_selected_canonical': 'Inspected URL'}
https://www.ranksense.com/blog/
{'referring_page': None, 'last_crawl': 'Mar 23, 2019, 11:53:31 AM', 'crawled_as': 'Googlebot sm
#VIEW CRAWLED PAGE
view_crawled_page = "#yDmH0d > c-wiz:nth-child(21) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div.LgQiCc.vOSR6b.RVNZdd.qtMyGd > content > div.DxKHSb > div > c-wiz:nth-child(1) > div > span > div > content > span > div > span"
#We will click on this element to open the crawled page drawer.
#TEST LIVE
test_live = "#yDmH0d > c-wiz:nth-child(21) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.sVgexf.VpESuc > div.too21 > c-wiz > div > div > content > span"
#We will click on this element to check the live version of the page.