Skip to content

Instantly share code, notes, and snippets.

@eleco
Created May 13, 2022 08:09
Show Gist options
  • Save eleco/294774e8dc075904ff5b61a2b08f4162 to your computer and use it in GitHub Desktop.
Save eleco/294774e8dc075904ff5b61a2b08f4162 to your computer and use it in GitHub Desktop.
import asyncio
from pyppeteer import launch
async def get_article_titles(keywords):
# launch browser in headless mode
browser = await launch({"headless": False, "args": ["--start-maximized"]})
# create a new page
page = await browser.newPage()
# set page viewport to the largest size
await page.setViewport({"width": 1600, "height": 900})
# navigate to the page
await page.goto("https://www.google.fr/search?q=strategy journey")
# wait for search results to load
await page.waitFor(1000)
pages = await browser.pages();
print(pages)
popup = pages[1]
x = await popup.querySelectorAll("button")
for xx in x:
title = await xx.getProperty("textContent")
tilej = await title.jsonValue()
if tilej == 'I agree':
await xx.click()
await page.waitFor(100)
# btn = await popup.xpath('//button[contains(text(),"e")]')
# wait page.click('//button[contains(text(),"I agree")]')
url = await page.evaluate("() => window.location.href")
print(url)
# extract the article titles
#topics = await page.querySelectorAll("a>div>h3")
results = await page.Jx("//div[@class = 'g']//a[h3]");
#topics = await page.evaluate('(results) => results.href' ,results)
for topic in results:
title = await topic.getProperty("textContent")
print(await title.jsonValue())
link = await topic.getProperty("href")
prize_href = await page.evaluate('(g) => g.href', topic)
# print the article titles
print(prize_href)
print(await link.jsonValue())
print("Starting...")
asyncio.get_event_loop().run_until_complete(
get_article_titles(["python", "opensource", "opencv"])
)
print("Finished extracting articles titles")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment