Skip to content

Instantly share code, notes, and snippets.

@riteshkawadkar
Last active November 16, 2021 08:27
Show Gist options
  • Save riteshkawadkar/5b427de101f008743e600cf74ba2a7e6 to your computer and use it in GitHub Desktop.
Save riteshkawadkar/5b427de101f008743e600cf74ba2a7e6 to your computer and use it in GitHub Desktop.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import Chrome
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import settings
import requests
import json
import html
import os
import pathlib
from selectorlib import Extractor
import urllib.request
profilePath= "C:\\Users\\justs\\AppData\\Local\\Google\\Chrome\\User Data"
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--ignore-certificate-errors")
options.add_argument("user-data-dir="+ profilePath)
SITE_ROOT = os.path.realpath(os.path.dirname(__file__))
def get_price_name_OLD(target_url, stock, disc, employee):
product_name = 'Not Found'
product_price = 'Not Found'
product_description = 'Not Found'
product_category = 'Select'
# Delete old file before downloading new one
product_image_path= 'C:\\Users\\justs\\OneDrive\\Pictures\\ProductImages\\Test1.png'
file = pathlib.Path(product_image_path)
if file.exists():
os.remove(product_image_path)
try:
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get(target_url)
myElem = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "//h1[@data-test = 'product-title']")))
print("PAGE IS READY = " + target_url)
except TimeoutException:
print("Loading took too much time!")
try:
product_name = driver.find_element(By.XPATH, "//h1[@data-test = 'product-title']").text
except Exception as e:
product_name = 'Not Found'
try:
product_description = driver.find_element(By.XPATH, "//h3[text()='Description']//parent::div//div").text
except Exception as e:
product_description = 'Not Found'
try:
product_category = driver.find_element(By.XPATH, "//div[@data-test='breadcrumb']//span[2]//span[@itemprop='name']").text
except Exception as e:
product_category = 'Select'
try:
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "//div[@data-test = 'product-price']")))
product_price = driver.find_element(By.XPATH, "//div[@data-test = 'product-price']").text
except Exception as e:
product_price = 'Not Found'
try:
image = driver.find_element(By.XPATH, "//div[@class='slideDeckPicture']//picture/img[1]")
product_image_url = image.get_attribute('src')
print(product_image_url)
product_image_name= 'Test1.png'
urllib.request.urlretrieve(product_image_url, "C:\\Users\\justs\\OneDrive\\Pictures\\ProductImages\\Test1.png")
except Exception as e:
product_image_name= 'Test_default.png'
driver.quit()
return {"upc":upc,
"product_name":product_name,
"product_price":product_price.replace('$', ''),
"product_image" : product_image_name,
"product_description" : product_description,
"product_category": product_category
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment