git-bhanu · June 4, 2021 09:31
diff --git a/scrape.py b/scrape.py
 import pandas as pd
 from tqdm import tqdm
 from selenium.webdriver import Chrome
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.common.exceptions import NoSuchElementException
 from selenium.common.exceptions import TimeoutException

 from time import sleep

 data = pd.read_csv("D:\Scrape\WooCommerce Plugin\woocommerce_resultData.csv")

 driver = Chrome(executable_path='C:\chromedriver_win32\chromedriver.exe')
 driver.set_page_load_timeout(10)

 bar = tqdm(data.index)
 for ind in bar:
  bar.set_postfix({'Package': data['name'][ind] })
  x = data['content'][ind]
  if str(x)=='nan' and type(x)!='str':
    url = data['link'][ind]
    link = url[:url.find('?')]

    try:
      driver.get(link)
    except TimeoutException:
      continue
    except:
      continue

    try:
      MainContent = driver.find_element_by_css_selector(".entry-content")
    except:
      continue

    #  Title Of the Product
    try:
      productName = MainContent.find_element_by_css_selector(".product-new-header h1").get_attribute('innerHTML')
    except:
      productName = False


    js = "var aa=document.getElementsByClassName('product-new-header')[0];aa.parentNode.removeChild(aa)"
    driver.execute_script(js)

    #  Title Of the Product
    try:
      productContent = driver.find_element_by_css_selector(".entry-content").text;
    except:
      productContent = False


    #  Content Of the Product

    #  Price of the product
    try:
      Price = driver.find_element_by_css_selector(".product-new-sidebar .tier-price").text;
    except NoSuchElementException:
      try:
        Price = driver.find_element_by_css_selector("button[name=buy-now-button]").text
      except NoSuchElementException:
        Price = False

    #  Title Of the Product
    try:
      Category_ = driver.find_element_by_css_selector(".details-category").find_elements_by_tag_name("a")
      category = []
      for lnk in Category_:
        category.append(lnk.text)

      final_category = ",".join(category)
    except:
      final_category = False


    data.loc[ind, 'link'] = link
    data.loc[ind, 'name'] = productName
    data.loc[ind, 'content'] = productContent
    data.loc[ind, 'price'] = Price
    data.loc[ind, 'categories'] = final_category
    data.to_csv(r'D:\Scrape\WooCommerce Plugin\woocommerce_resultData.csv', index=True)
	import pandas as pd
	from tqdm import tqdm
	from selenium.webdriver import Chrome
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.common.exceptions import NoSuchElementException
	from selenium.common.exceptions import TimeoutException

	from time import sleep

	data = pd.read_csv("D:\Scrape\WooCommerce Plugin\woocommerce_resultData.csv")

	driver = Chrome(executable_path='C:\chromedriver_win32\chromedriver.exe')
	driver.set_page_load_timeout(10)

	bar = tqdm(data.index)
	for ind in bar:
	bar.set_postfix({'Package': data['name'][ind] })
	x = data['content'][ind]
	if str(x)=='nan' and type(x)!='str':
	url = data['link'][ind]
	link = url[:url.find('?')]

	try:
	driver.get(link)
	except TimeoutException:
	continue
	except:
	continue

	try:
	MainContent = driver.find_element_by_css_selector(".entry-content")
	except:
	continue

	# Title Of the Product
	try:
	productName = MainContent.find_element_by_css_selector(".product-new-header h1").get_attribute('innerHTML')
	except:
	productName = False


	js = "var aa=document.getElementsByClassName('product-new-header')[0];aa.parentNode.removeChild(aa)"
	driver.execute_script(js)

	# Title Of the Product
	try:
	productContent = driver.find_element_by_css_selector(".entry-content").text;
	except:
	productContent = False


	# Content Of the Product

	# Price of the product
	try:
	Price = driver.find_element_by_css_selector(".product-new-sidebar .tier-price").text;
	except NoSuchElementException:
	try:
	Price = driver.find_element_by_css_selector("button[name=buy-now-button]").text
	except NoSuchElementException:
	Price = False

	# Title Of the Product
	try:
	Category_ = driver.find_element_by_css_selector(".details-category").find_elements_by_tag_name("a")
	category = []
	for lnk in Category_:
	category.append(lnk.text)

	final_category = ",".join(category)
	except:
	final_category = False


	data.loc[ind, 'link'] = link
	data.loc[ind, 'name'] = productName
	data.loc[ind, 'content'] = productContent
	data.loc[ind, 'price'] = Price
	data.loc[ind, 'categories'] = final_category
	data.to_csv(r'D:\Scrape\WooCommerce Plugin\woocommerce_resultData.csv', index=True)
No results found