Created
July 8, 2021 14:04
-
-
Save thatbudakguy/e35e72835f1766450998a9612404bb57 to your computer and use it in GitHub Desktop.
percy scrapy crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from percy import percy_snapshot | |
from scrapy.spiders import SitemapSpider | |
from selenium import webdriver | |
class PercySpider(SitemapSpider): | |
"""Sitemap crawler that uploads DOM snapshots to Percy.""" | |
name = "cdhweb" | |
sitemap_urls = ["http://localhost:8000/sitemap.xml"] | |
allowed_domains = ["localhost"] # don't follow external links | |
device_widths = [375, 768, 1280] # breakpoints for this project | |
def __init__(self, *args, **kwargs): | |
super().__init__(*args, **kwargs) | |
self.get_browser() | |
def get_browser(self): | |
"""Create a browser driver to use for taking snapshots.""" | |
# NOTE using Chrome here is arbitrary; we're just sending the DOM | |
# snapshot so Percy can render it in the cloud (in parallel). | |
options = webdriver.ChromeOptions() | |
options.add_argument("--no-sandbox") | |
options.add_argument("--disable-extensions") | |
options.add_argument("--disable-dev-shm-usage") | |
options.add_argument("--disable-setuid-sandbox") | |
options.add_argument("--headless") | |
self.browser = webdriver.Chrome( | |
"node_modules/chromedriver/bin/chromedriver", options=options | |
) | |
def parse(self, response): | |
"""Take a snapshot of a single URL and upload to Percy.""" | |
self.browser.get(response.url) | |
percy_snapshot( | |
self.browser, # use configured browser for snapshots | |
self.browser.title, # use page <title> for snapshot title | |
width=self.device_widths, # take snapshots at all screen sizes | |
) | |
def closed(self, reason): | |
"""Shut down the browser used for taking snapshots.""" | |
self.browser.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment