Skip to content

Instantly share code, notes, and snippets.

@jmrobles
Created August 13, 2020 15:30
Show Gist options
  • Save jmrobles/01c0341c9ccd6f7bdcf9b16c45b8223e to your computer and use it in GitHub Desktop.
Save jmrobles/01c0341c9ccd6f7bdcf9b16c45b8223e to your computer and use it in GitHub Desktop.
Selenium with Chrome webdriver scraper class
class ScraperWebJS:
prepared = False
def prepare(self):
"""
Prepare the headless browser for a scrap session
"""
if self.prepared:
return
self.prepared = True
opts = Options()
opts.add_argument('--headless')
opts.add_argument('windows-size=1920,1080')
self.drv = webdriver.Chrome(executable_path=CHROME_DRIVER_PATH, options=opts)
self.drv.implicitly_wait(10)
def scrape(self, url: str) -> Union[str, None]:
"""
Scrape the "url" using selenium with Chrome backend
"""
if not self.prepared:
self.prepare()
try:
self.drv.get(url)
except Exception as err:
self.finish()
return None
return self.drv.page_source
def finish(self):
"""
Finish scrapping session
"""
if self.prepared:
self.drv.quit()
self.prepared = False
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment