Created
December 27, 2023 07:35
-
-
Save cpouldev/7592587843c36a95a93b48b719b6a784 to your computer and use it in GitHub Desktop.
Sklavenitis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from random import randint | |
from scrapy import Request | |
from w3lib import html | |
from src.scraper.helpers import format_price | |
from src.scraper.shops.spiders.base import SupermarketSpider | |
BASE_URL = 'https://www.sklavenitis.gr{cat}?$component=Atcom.Sites.Yoda.Components.ProductList.Index&sortby=ByPopularity&pg={page}&endless=true' | |
class SklavenitisSpider(SupermarketSpider): | |
name = 'sklavenitis' | |
allowed_domains = ['sklavenitis.gr'] | |
start_urls = ['https://www.sklavenitis.gr/katigories/'] | |
def start_requests(self): | |
yield Request(url='https://www.sklavenitis.gr/katigories/', headers={ | |
'X-Forwarded-For': f'46.246.{randint(128, 255)}.{randint(0, 255)}' | |
}, callback=self.parse_init) | |
def parse_init(self, response): | |
cats = response.css('aside nav.mainNav a') | |
for cat in cats: | |
url = cat.xpath('@href').get() | |
name = cat.css('::text').get() | |
if name: | |
name = name.strip() | |
yield Request(url=BASE_URL.format(cat=url, page=1), | |
callback=self.parse_catalog, | |
headers={'X-Forwarded-For': f'46.246.{randint(128, 255)}.{randint(0, 255)}'}, | |
meta={'page': 1, | |
'cat_url': url, | |
'cat_name': name}) | |
def parse_catalog(self, response): | |
items = response.css('.product') | |
page = response.meta['page'] | |
if len(items) <= 0: | |
return None | |
for item in items: | |
title = item.css('h4.product__title a::text').get().strip() | |
cost_per_unit = item.css('.priceKil') | |
if cost_per_unit: | |
if cost_per_unit.css('.del'): | |
cost_per_unit = cost_per_unit.css('.hightlight').get() | |
else: | |
cost_per_unit = cost_per_unit.get() | |
cost_per_unit = html.remove_tags(cost_per_unit).strip().replace('\n', '') | |
qty = item.css('.itemsEst::text').get() | |
if qty: | |
title = f'{title} ({qty})' | |
offer = item.css('.gift_number::text').get() | |
image_url = item.css('.product__figure img').xpath('@src').get() | |
price = format_price(item.css('.main-price .deleted__price::text').get()) | |
sale_price = format_price(item.css('.main-price .price::text').get()) | |
if price and not sale_price: | |
sale_price = price | |
if not price and not sale_price: | |
continue | |
url = 'https://www.sklavenitis.gr' + item.css('.absLink').xpath('@href').get() | |
image_item, image_hash = self.get_image_item(image_url) | |
yield image_item | |
self.insert_item( | |
item=title, | |
key=url, | |
price=price, | |
sale_price=sale_price, | |
offer=offer, | |
url=url, | |
image=image_hash, | |
cost_per_unit=cost_per_unit | |
) | |
return Request(url=BASE_URL.format(cat=response.meta['cat_url'], page=page + 1), | |
callback=self.parse_catalog, | |
meta={**response.meta, 'page': page + 1}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment