mhsharifi96 · November 24, 2024 07:20
diff --git a/find-ganj.py b/find-ganj.py
 """ این کد جهت یافتن شکار گنج دیجی کالاست"""

 import os
 import requests
 from multiprocessing import Pool, cpu_count
 import random
 # Create a folder to save images
 os.makedirs("digikala_images", exist_ok=True)
 os.makedirs("digikala_images/final", exist_ok=True)

 import shutil
 from PIL import Image
 import pytesseract


 def find_text_in_image(image_path, product_id, target_text=''):
    """Check if the target text exists in the image."""
    try:
        # Open the image
        img = Image.open(image_path)

        # Perform OCR on the image
        extracted_text = pytesseract.image_to_string(img, lang="fas")  # 'fas' for Persian
        
        # Check if target text is in the extracted text
        if len(extracted_text.strip())>0:
            print('----------------------')

            print(f"link : https://www.digikala.com/product/dkp-{product_id}/")
            print(f"{product_id} : {extracted_text.strip()}")
        target_text = "شکار گنج"
        traget_text_2 = "شکارگنج"
        
        if (target_text in extracted_text) or (traget_text_2 in extracted_text):
            print(f"Text found Text found Text found : {target_text}")
            print(f"Text found Text found link : https://www.digikala.com/product/dkp-{product_id}/")

            print('----------*************************------------')
            return True
        
            
        # else:
        #     print("Text not found.")
        #     return False
    except Exception as e:
        print(f"Error processing image: {e}")
        return False


 def copy_file(source_path, destination_dir):
    """Copy a file to another directory."""
    try:
        # Copy the file to the destination directory
        shutil.copy(source_path, destination_dir)
        print(f"File copied from {source_path} to {destination_dir}")
    except Exception as e:
        print(f"Error copying file: {e}")



 def fetch_products(page_url):
    """Fetch product data from the category page."""
    try:
        response = requests.get(page_url)
        if response.status_code == 200:
            return response.json()
        else:
            # print(f"Failed to fetch products: {response.status_code}")
            return None
    except Exception as e:
        # print(f"Error fetching products: {e}")
        return None


 def fetch_product_details(product_id):
    """Fetch product details for a specific product."""
    product_url = f"https://api.digikala.com/v2/product/{product_id}/"
    try:
        response = requests.get(product_url)
        if response.status_code == 200:
            return response.json()
        else:
            # print(f"Failed to fetch product {product_id}: {response.status_code}")
            return None
    except Exception as e:
        # print(f"Error fetching product {product_id}: {e}")
        return None


 def download_image(image_url, product_id, folder="digikala_images"):
    """Download an image and save it locally."""
    try:
        response = requests.get(image_url, stream=True)
        if response.status_code == 200:
            random_number = random.randint(0, 10000)

            image_name = f'{product_id}_{str(random_number)}.jpeg'
            image_path = os.path.join(folder, image_name)
            with open(image_path, "wb") as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            # print(f"Downloaded: {image_url}")
            finded = find_text_in_image(image_path=image_path,product_id=product_id)
            if finded:
                dest_dir = "/home/user1/Documents/mci-project/seo-gateway/digi/digikala_images/final"
                copy_file(source_path=image_path ,destination_dir=dest_dir)

        else:
            pass
            # print(f"Failed to download: {image_url}")
    except Exception as e:
        pass
        # print(f"Error downloading {image_url}: {e}")


 def process_product(product_id):
    """Fetch product details and download its images."""
    product_data = fetch_product_details(product_id)
    if not product_data:
        return

    # Extract image URLs
    images = product_data.get("data", {}).get("product", {}).get("images", {}).get('list', [])
    if len(images)>=1:
        # image = images[-1]
        for image in images:
            image_url = image.get("url")[0]
            if image_url:
                download_image(image_url = image_url, product_id=product_id)
    else:
        print(f"product_id : ::::: {product_id}")


 def main():
    max_page = 200
    for page in range(max_page):
        print(page)
    # Category URL to fetch products (update the page number as needed)
        category_url = f"https://api.digikala.com/v1/categories/rural-products/search/?page={page}&sort=7&th_no_track=1"
        print(category_url)
        # Fetch products from the category page
        category_data = fetch_products(category_url)
        if not category_data:
            return

        # Extract product IDs
        products = category_data.get("data", {}).get("products", [])
        product_ids = [product.get("id") for product in products if "id" in product]
        # print(product_ids)

        # Use multiprocessing to process each product
        with Pool(cpu_count()) as pool:
            pool.map(process_product, product_ids)


 if __name__ == "__main__":
    main()
diff --git a/ocr.py b/ocr.py
 from PIL import Image
 import pytesseract


 def find_text_in_image(image_path, product_id, target_text=''):
    """Check if the target text exists in the image."""
    try:
        # Open the image
        img = Image.open(image_path)

        # Perform OCR on the image
        extracted_text = pytesseract.image_to_string(img, lang="fas")  # 'fas' for Persian

        # Check if target text is in the extracted text
        print('----------------------')
        print(f"{product_id} : {extracted_text}")
        print('----------------------')
        if target_text in extracted_text:
            print(f"Text found: {target_text}")
            return True
        else:
            print("Text not found.")
            return False
    except Exception as e:
        print(f"Error processing image: {e}")
        return False


 # Test the function
 image_path = "/home/user1/Documents/project/digi/digikala_images/sample.jpeg"  # Replace with your image file path
 target_text = "شکار گنج"
 find_text_in_image(image_path, target_text)
	""" این کد جهت یافتن شکار گنج دیجی کالاست"""

	import os
	import requests
	from multiprocessing import Pool, cpu_count
	import random
	# Create a folder to save images
	os.makedirs("digikala_images", exist_ok=True)
	os.makedirs("digikala_images/final", exist_ok=True)

	import shutil
	from PIL import Image
	import pytesseract


	def find_text_in_image(image_path, product_id, target_text=''):
	"""Check if the target text exists in the image."""
	try:
	# Open the image
	img = Image.open(image_path)

	# Perform OCR on the image
	extracted_text = pytesseract.image_to_string(img, lang="fas") # 'fas' for Persian

	# Check if target text is in the extracted text
	if len(extracted_text.strip())>0:
	print('----------------------')

	print(f"link : https://www.digikala.com/product/dkp-{product_id}/")
	print(f"{product_id} : {extracted_text.strip()}")
	target_text = "شکار گنج"
	traget_text_2 = "شکارگنج"

	if (target_text in extracted_text) or (traget_text_2 in extracted_text):
	print(f"Text found Text found Text found : {target_text}")
	print(f"Text found Text found link : https://www.digikala.com/product/dkp-{product_id}/")

	print('----------*************************------------')
	return True


	# else:
	# print("Text not found.")
	# return False
	except Exception as e:
	print(f"Error processing image: {e}")
	return False


	def copy_file(source_path, destination_dir):
	"""Copy a file to another directory."""
	try:
	# Copy the file to the destination directory
	shutil.copy(source_path, destination_dir)
	print(f"File copied from {source_path} to {destination_dir}")
	except Exception as e:
	print(f"Error copying file: {e}")



	def fetch_products(page_url):
	"""Fetch product data from the category page."""
	try:
	response = requests.get(page_url)
	if response.status_code == 200:
	return response.json()
	else:
	# print(f"Failed to fetch products: {response.status_code}")
	return None
	except Exception as e:
	# print(f"Error fetching products: {e}")
	return None


	def fetch_product_details(product_id):
	"""Fetch product details for a specific product."""
	product_url = f"https://api.digikala.com/v2/product/{product_id}/"
	try:
	response = requests.get(product_url)
	if response.status_code == 200:
	return response.json()
	else:
	# print(f"Failed to fetch product {product_id}: {response.status_code}")
	return None
	except Exception as e:
	# print(f"Error fetching product {product_id}: {e}")
	return None


	def download_image(image_url, product_id, folder="digikala_images"):
	"""Download an image and save it locally."""
	try:
	response = requests.get(image_url, stream=True)
	if response.status_code == 200:
	random_number = random.randint(0, 10000)

	image_name = f'{product_id}_{str(random_number)}.jpeg'
	image_path = os.path.join(folder, image_name)
	with open(image_path, "wb") as f:
	for chunk in response.iter_content(1024):
	f.write(chunk)
	# print(f"Downloaded: {image_url}")
	finded = find_text_in_image(image_path=image_path,product_id=product_id)
	if finded:
	dest_dir = "/home/user1/Documents/mci-project/seo-gateway/digi/digikala_images/final"
	copy_file(source_path=image_path ,destination_dir=dest_dir)

	else:
	pass
	# print(f"Failed to download: {image_url}")
	except Exception as e:
	pass
	# print(f"Error downloading {image_url}: {e}")


	def process_product(product_id):
	"""Fetch product details and download its images."""
	product_data = fetch_product_details(product_id)
	if not product_data:
	return

	# Extract image URLs
	images = product_data.get("data", {}).get("product", {}).get("images", {}).get('list', [])
	if len(images)>=1:
	# image = images[-1]
	for image in images:
	image_url = image.get("url")[0]
	if image_url:
	download_image(image_url = image_url, product_id=product_id)
	else:
	print(f"product_id : ::::: {product_id}")


	def main():
	max_page = 200
	for page in range(max_page):
	print(page)
	# Category URL to fetch products (update the page number as needed)
	category_url = f"https://api.digikala.com/v1/categories/rural-products/search/?page={page}&sort=7&th_no_track=1"
	print(category_url)
	# Fetch products from the category page
	category_data = fetch_products(category_url)
	if not category_data:
	return

	# Extract product IDs
	products = category_data.get("data", {}).get("products", [])
	product_ids = [product.get("id") for product in products if "id" in product]
	# print(product_ids)

	# Use multiprocessing to process each product
	with Pool(cpu_count()) as pool:
	pool.map(process_product, product_ids)


	if __name__ == "__main__":
	main()