Created
December 27, 2023 07:36
-
-
Save cpouldev/a48fcff63ad5ab95da845cea19e580c1 to your computer and use it in GitHub Desktop.
xalkiadakis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from scrapy import Request | |
from src.scraper.shops.spiders.base import SupermarketSpider | |
def format_price(p): | |
try: | |
return float(p.strip()) | |
except: | |
return None | |
class XalkiadakisSpider(SupermarketSpider): | |
name = 'xalkiadakis' | |
allowed_domains = ['xalkiadakis.gr'] | |
start_urls = ['https://eshop.xalkiadakis.gr/'] | |
def parse(self, response): | |
cats = response.css('#mega-menu-primary li.mega-proiontamenu > ul > li > a.mega-menu-link').xpath( | |
'@href').getall() | |
for cat in cats: | |
yield Request(url=cat, callback=self.parse_catalog) | |
def parse_catalog(self, response): | |
items = response.css('li.product') | |
next_page = response.css('.page-numbers a.next').xpath('@href').get() | |
for item in items: | |
title = item.css('.woocommerce-loop-product__title::text').get() | |
image_url = item.css('img').xpath('@src').get() | |
sale_price = format_price(item.css('.sale_price bdi::text').get()) | |
price = format_price(item.css('.price del bdi::text').get()) | |
url = item.css('a.woocommerce-LoopProduct-link.woocommerce-loop-product__link').xpath('@href').get() | |
image_item, image_hash = self.get_image_item(image_url) | |
if price and not sale_price: | |
sale_price = price | |
if not price and not sale_price: | |
continue | |
yield image_item | |
self.insert_item( | |
item=title, | |
key=url, | |
sale_price=sale_price, | |
price=price, | |
url=url, | |
image=image_hash | |
) | |
if next_page: | |
yield Request(url=next_page, callback=self.parse_catalog) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment