Created
December 27, 2023 07:34
-
-
Save cpouldev/0c5fc954fae197d713d35def77881384 to your computer and use it in GitHub Desktop.
Masoutis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import json | |
from scrapy import Request | |
from src.scraper.shops.spiders.base import SupermarketSpider | |
FORM_URL = 'https://eshop.masoutis.gr/WcfScanNShopForWeb/OrdersService.svc/GetPromoItemWithListCouponsSubCategories/' | |
HEADERS = {'Content-Type': 'application/json; charset=utf-8', 'Host': 'eshop.masoutis.gr', | |
'Origin': 'https://eshop.masoutis.gr/', 'X-Requested-With': 'XMLHttpRequest', | |
'Referer': 'https://eshop.masoutis.gr/'} | |
def format_price(p): | |
try: | |
return float(p) | |
except: | |
return None | |
class MasoutisSpider(SupermarketSpider): | |
name = 'masoutis' | |
allowed_domains = ['eshop.masoutis.gr'] | |
app_key = '' | |
app_uid = '' | |
app_usl = '' | |
download_delay = 15 | |
def authed_request(self, **kwargs): | |
return Request(headers={**HEADERS, | |
'Key': self.app_key, | |
'Uid': self.app_uid, | |
'Usl': self.app_usl}, | |
**kwargs) | |
def make_cat_request(self, cat_id, page): | |
REQ_DATA = { | |
"PassKey": "Sc@NnSh0p", | |
"Itemcode": cat_id, | |
"ItemDescr": "0", | |
"IfWeight": page | |
} | |
return self.authed_request( | |
url='https://eshop.masoutis.gr/WcfScanNShopForWeb/OrdersService.svc/GetPromoItemWithListCouponsSubCategories/', | |
method='POST', | |
callback=self.parse_catalog, | |
body=json.dumps(REQ_DATA), | |
meta={'page': page, 'cat_id': cat_id}) | |
def start_requests(self): | |
yield Request(url='https://eshop.masoutis.gr/Home/GetGred', | |
headers=HEADERS, | |
method='POST', | |
body=json.dumps({'PassTemp': 'ScanNShop$c@nNSh0p'}), | |
callback=self.request_categories) | |
def request_categories(self, response): | |
data = json.loads(response.body) | |
self.app_key = data['Key'] | |
self.app_uid = data['Uid'] | |
self.app_usl = data['Usl'] | |
yield self.authed_request( | |
url='https://eshop.masoutis.gr/WcfScanNShopForWeb/OrdersService.svc/GetScanNShopMenuAllLevels/', | |
method='POST', | |
body=json.dumps({'PassKey': 'Sc@NnSh0p'}), | |
callback=self.parse_categories) | |
def parse_categories(self, response): | |
data = json.loads(response.body) | |
cats = set([c['HeaderMenuItem'] for c in data if c['HeaderMenuItemLinkDescr'] != 'oi-sunergates-mas']) | |
for cat_id in cats: | |
yield self.make_cat_request(cat_id=cat_id, | |
page=1) | |
def parse_catalog(self, response): | |
data = json.loads(response.body) | |
cat_id = response.meta['cat_id'] | |
if len(data) <= 0: | |
return None | |
for item in data: | |
img_url = item['PhotoData'] | |
offer = item['Discount'] | |
url = item['ItemDescrLink'] | |
if offer: | |
offer = offer.strip() | |
image_item, image_hash = self.get_image_item(img_url) | |
yield image_item | |
self.insert_item( | |
item=item.get('ItemDescr', None), | |
key=url, | |
offer=offer, | |
url=url, | |
sale_price=format_price(item.get('PosPrice', None)), | |
price=format_price(item.get('StartPrice', None)), | |
image=image_hash | |
) | |
page = response.meta['page'] | |
next_page = page + 1 | |
yield self.make_cat_request(cat_id=cat_id, page=next_page) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment