Skip to content

Instantly share code, notes, and snippets.

@cpouldev
Created December 27, 2023 07:40
Show Gist options
  • Save cpouldev/76a95c7f19182eaab9923ba4ff8e6cb5 to your computer and use it in GitHub Desktop.
Save cpouldev/76a95c7f19182eaab9923ba4ff8e6cb5 to your computer and use it in GitHub Desktop.
ποκι
# -*- coding: utf-8 -*-
import json
import re
import unicodedata
from scrapy import Request
from src.scraper.shops.spiders.base import SupermarketSpider
POCKEE_SHOP_MAP = {
# 'sklavenitis': 2,
'galaxias': 10,
# 'kritikos': 12,
# 'mymarket': 27,
# 'masoutis': 4,
# 'efresh': 21,
# 'bazaar': 11,
}
ACCESS_TOKENS = [
'ENA ACCESS TOKEN APO KAPOU'
]
# BASE_URL = 'https://api.pockee.com/v1/records?retailer={shop}&per_page=50&page={page}&filter=offers'
BASE_URL = 'https://api-do.pockee.com/v7/site/products?retailer_id={shop}&page={page}&perPage=50'
FILTERS_URL = 'https://api-do.pockee.com/v7/mobile/products/filters?retailer_id={shop}'
HEADERS = {
'Accept': 'application/json, text/plain, */*',
'Host': 'api.pockee.com',
'Origin': 'https://pockee.com',
'Connection': 'keep-alive',
'pockee-api-token': ACCESS_TOKENS[0],
'pockee-api-version': '1.1.4',
'Referer': 'https://pockee.com/',
'Set-Fetch-Mode': 'cors',
'Set-Fetch-Site': 'same-origin',
'Set-Fetch-Dest': 'empty',
}
def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn')
class PockeeSpider(SupermarketSpider):
name = 'pockee'
allowed_domains = ['pockee.com']
download_delay = 5
brands = {}
shop_id = None
shop = None
def do_request(self, page, meta=None):
return Request(url=BASE_URL.format(shop=self.shop_id, page=page),
callback=self.parse_catalog,
dont_filter=True,
headers=HEADERS,
meta=meta)
def start_requests(self):
yield Request(url=FILTERS_URL.format(shop=self.shop_id),
headers=HEADERS,
callback=self.parse_filters)
def parse_filters(self, response):
data = json.loads(response.body)
brands = [strip_accents(d['brand_name']).upper() for d in data['data']['brands']]
self.brands = set(brands)
yield self.do_request(1)
def parse_catalog(self, response):
data = json.loads(response.body)
pages = data['meta']['pagination']['to']
curr_page = data['meta']['pagination']['current_page']
for item in data['data']:
item = item['product']
title = item['title']
offer = None
price = None
sale_price = None
offers = item['offers']
if offers and len(offers) > 0:
offer = offers[0]['type_human']
price = offers[0]['pivot']['offerproduct_start_value']
sale_price = offers[0]['pivot']['offerproduct_value']
if price == sale_price:
price = None
if re.findall(r'δ[ωώ]ρ[οα]υ?', offer, flags=re.IGNORECASE):
title = f'{title} {offer}'
else:
prices = item['prices']
if prices and len(prices) > 0:
sale_price = prices[0]['pricing_value']
if not price and not sale_price:
continue
image_url = item['image_versions']['image_thumb']
image_item, image_hash = self.get_image_item(image_url)
yield image_item
title_set = set(title.split(' '))
matches = self.brands.intersection(title_set)
if len(matches) > 0:
match = list(matches)[0]
title = title.replace(match, '')
title = f'{match} {title}'
self.insert_item(
item=title,
key=str(item['id']),
url=None,
offer=offer,
price=price,
sale_price=sale_price,
image=image_hash
)
if curr_page < pages:
yield self.do_request(curr_page + 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment