Skip to content

Instantly share code, notes, and snippets.

@mrunderline
Created February 8, 2019 19:03
Show Gist options
  • Save mrunderline/8130392d6a8abd55587b88011842b512 to your computer and use it in GitHub Desktop.
Save mrunderline/8130392d6a8abd55587b88011842b512 to your computer and use it in GitHub Desktop.
This code help you to find your product in digikala.com which has a specific word in params!
# -*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
url = 'https://www.digikala.com/search/category-tablet/?has_selling_stock=1&type[0]=211&attribute[A20378][0]=25460&attribute[A20378][1]=25462&attribute[A20421][0]=25676&attribute[A20421][1]=25677&pageno=1&last_filter=type&last_value=211&sortby=4'
word = 'قلم'
first_char = '?'
if first_char in url:
first_char = '&'
found_products = []
page = 0
has_page = True
while has_page:
page += 1
print 'page', page
product_list_url = url + first_char + 'pageno=' + str(page)
products_list = BeautifulSoup(requests.get(product_list_url).content, 'lxml')
products_list = products_list.select('.c-product-box')
if len(products_list) < 1:
has_page = False
break
products_url = []
for product_list in products_list:
for a in product_list.find_all('a'):
href = a.get('href')
if href is not None and '/product/dkp-' in href:
if href not in products_url:
products_url.append(href)
for product_url in products_url:
product_url = 'https://www.digikala.com' + product_url.encode('utf-8')
print product_url
product = BeautifulSoup(requests.get(product_url).content, 'lxml')
if word in product.select('.c-params')[0].text.encode('utf-8'):
found_products.append(product_url)
print '========== FOUND! =========='
f = open('products.txt', 'w')
for product in found_products:
f.write(product + '\n')
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment