wkarney · July 26, 2024 01:57 · junguler · Nov 15, 2022 · wkarney · Nov 16, 2022
diff --git a/infinite_scroll_scraping_selenium.py b/infinite_scroll_scraping_selenium.py
 from time import sleep
 from selenium import webdriver
 from bs4 import BeautifulSoup

 # Headless/incognito Chrome driver  
 chrome_options = webdriver.ChromeOptions()
 chrome_options.add_argument("--incognito")
 chrome_options.add_argument('headless')
 driver = webdriver.Chrome(executable_path='CHROMEDRIVER_PATH',chrome_options=chrome_options)

 driver.get('http://www.exampleurl.com')

 # Set sleep time for the page to load on scroll
 SCROLL_PAUSE_TIME = 2

 # Get scroll height
 last_height = driver.execute_script("return document.body.scrollHeight")

 # If you want to limit the number of scroll loads, add a limit here
 scroll_limit = 5

 count = 0
 while True and count < scroll_limit:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    count += 1

 sleep(2) 

 html = driver.page_source
 soup = BeautifulSoup(html, 'lxml')
	from time import sleep
	from selenium import webdriver
	from bs4 import BeautifulSoup

	# Headless/incognito Chrome driver
	chrome_options = webdriver.ChromeOptions()
	chrome_options.add_argument("--incognito")
	chrome_options.add_argument('headless')
	driver = webdriver.Chrome(executable_path='CHROMEDRIVER_PATH',chrome_options=chrome_options)

	driver.get('http://www.exampleurl.com')

	# Set sleep time for the page to load on scroll
	SCROLL_PAUSE_TIME = 2

	# Get scroll height
	last_height = driver.execute_script("return document.body.scrollHeight")

	# If you want to limit the number of scroll loads, add a limit here
	scroll_limit = 5

	count = 0
	while True and count < scroll_limit:
	# Scroll down to bottom
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

	# Wait to load page
	sleep(SCROLL_PAUSE_TIME)

	# Calculate new scroll height and compare with last scroll height
	new_height = driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break
	last_height = new_height
	count += 1

	sleep(2)

	html = driver.page_source
	soup = BeautifulSoup(html, 'lxml')
No results found