Skip to content

Instantly share code, notes, and snippets.

@hamletbatista
Last active April 4, 2019 00:23
Show Gist options
  • Select an option

  • Save hamletbatista/4afecd15ccb87d367b016be87970e44f to your computer and use it in GitHub Desktop.

Select an option

Save hamletbatista/4afecd15ccb87d367b016be87970e44f to your computer and use it in GitHub Desktop.
js_extractor="""
() => {
var data = {};
//referring page
data["referring_page"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(1) > div:nth-child(3) > div:nth-child(2) > div > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(1) > div:nth-child(3) > div:nth-child(2) > div');
//.textContent;
//last crawl
data["last_crawl"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(2) > div:nth-child(2) > div:nth-child(2) > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(2) > div:nth-child(2) > div:nth-child(2) > div');
//.textContent;
//user declared canonical
data["user_declared_canonical"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(3) > div:nth-child(2) > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(3) > div:nth-child(2) > div > div');
//.textContent;
//Google-selected canonincal
data["google_selected_canonical"] = document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > span > div > div.y3IDJd.rFZTte.Fx3kmc > content > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(4) > div:nth-child(2) > div') ||
document.querySelector('#yDmH0d > c-wiz:nth-child(20) > div > div.OoO4Vb > div.shSP > div > div > div:nth-child(3) > content > div > div > div > div.V8vvZb > div > div > content > div.I1NQYe > div:nth-child(3) > div:nth-child(4) > div:nth-child(2) > div');
//.textContent;
data["referring_page"] = data["referring_page"] && data["referring_page"].textContent;
data["last_crawl"] = data["last_crawl"] && data["last_crawl"].textContent;
data["user_declared_canonical"] = data["user_declared_canonical"] && data["user_declared_canonical"].textContent;
data["google_selected_canonical"] = data["google_selected_canonical"] && data["google_selected_canonical"].textContent;
return data;
}
"""
results = await page.evaluate(js_extractor)
print(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment