-
-
Save admariner/f72e5dbf5e7ff82aa731d55bac73a599 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
import pickle | |
class ShopifyThemeLinkSpider(scrapy.Spider): | |
name = 'shopifyspider' | |
with open('theme_links.pkl', 'rb') as f: | |
theme_links = pickle.load(f) | |
start_urls = theme_links.keys() | |
def parse(self, response): | |
#for theme in response.css('.theme-preview-link'): # I couldn't figure out how to get the first element | |
for theme in response.xpath("//a[contains(@class, 'theme-preview-link')][1]"): | |
yield {"demo-url": f"https://{theme.css('::attr(data-demo-url)').get()}", | |
"link" : response.url, #crawled page | |
"theme": self.theme_links[response.url]["theme"]} #theme from pickled file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment