jeanmidevacc · November 4, 2019 00:27
diff --git a/offers_collecter.py b/offers_collecter.py
 # Load the dependencies
 from selenium import webdriver
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
 from bs4 import BeautifulSoup as bs
 from time import sleep

 # Define the main url (where to log the location)
 url_main_page = "https://turo.com/en-us?locale=en_US"

 # Define the city where to search the offers
 location = "Montreal, Quebec"

 # Define the driver for the execution
 driver = webdriver.Firefox()
 driver.maximize_window()

 # Connect to the main page
 driver.get(url_main_page)

 # For the animation
 sleep(5)

 # Input the location
 inputElement = driver.find_element_by_id("js-searchFormExpandedLocationInput")
 inputElement.send_keys(location)

 # Submit the research
 python_button = driver.find_elements_by_xpath("//button[@class='button button--green searchFormExpanded-button searchFormExpanded-button--round u-hideTinyScreen']")[0]
 python_button.click()

 # For the animation
 sleep(10)

 # Scroll all along the page of the search
 # Based on https://michaeljsanders.com/2017/05/12/scrapin-and-scrollin.html

 # Define a low bound for the scrolling window
 lowbound = 0

 # Define the number of scrolls
 count_scroll = 0

 # Define the list that will part of the url for the ads
 ads_collected = []
 count_adscollected = 0

 # Infinite loop
 while True:
    print("Scroll" , count_scroll)

    # Define the high bound of the scrolling window
    highbound = lowbound + 400
    
    # Scroll between the low and the high bounds
    driver.execute_script(f"window.scrollTo({lowbound}, {highbound});")
    
    # Collect all the data on the current page area
    source_data = driver.page_source
    soup = bs(source_data,features="html.parser")

    # Collect all the ads on the page
    ads = soup.findAll('a',{'class':'vehicleCard'}, href= True)

    if len(ads) > 0:
        new_ads = [ad["href"] for ad in ads]
        lowbound = highbound
        ads_collected = list(set(ads_collected + new_ads))
        
    # If the past and current length page are similar stop the scraping
    if count_adscollected == len(ads_collected):
        break

    # Store the new count of offers collected
    count_adscollected == len(ads_collected)
        
    # For the animation
    sleep(2)
    
    # Iterate on the scroll
    count_scroll += 1
    
    # For the animation too
    if count_scroll == 10:
        break
    
 # Stop the driver
 driver.close()
	# Load the dependencies
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
	from bs4 import BeautifulSoup as bs
	from time import sleep

	# Define the main url (where to log the location)
	url_main_page = "https://turo.com/en-us?locale=en_US"

	# Define the city where to search the offers
	location = "Montreal, Quebec"

	# Define the driver for the execution
	driver = webdriver.Firefox()
	driver.maximize_window()

	# Connect to the main page
	driver.get(url_main_page)

	# For the animation
	sleep(5)

	# Input the location
	inputElement = driver.find_element_by_id("js-searchFormExpandedLocationInput")
	inputElement.send_keys(location)

	# Submit the research
	python_button = driver.find_elements_by_xpath("//button[@class='button button--green searchFormExpanded-button searchFormExpanded-button--round u-hideTinyScreen']")[0]
	python_button.click()

	# For the animation
	sleep(10)

	# Scroll all along the page of the search
	# Based on https://michaeljsanders.com/2017/05/12/scrapin-and-scrollin.html

	# Define a low bound for the scrolling window
	lowbound = 0

	# Define the number of scrolls
	count_scroll = 0

	# Define the list that will part of the url for the ads
	ads_collected = []
	count_adscollected = 0

	# Infinite loop
	while True:
	print("Scroll" , count_scroll)

	# Define the high bound of the scrolling window
	highbound = lowbound + 400

	# Scroll between the low and the high bounds
	driver.execute_script(f"window.scrollTo({lowbound}, {highbound});")

	# Collect all the data on the current page area
	source_data = driver.page_source
	soup = bs(source_data,features="html.parser")

	# Collect all the ads on the page
	ads = soup.findAll('a',{'class':'vehicleCard'}, href= True)

	if len(ads) > 0:
	new_ads = [ad["href"] for ad in ads]
	lowbound = highbound
	ads_collected = list(set(ads_collected + new_ads))

	# If the past and current length page are similar stop the scraping
	if count_adscollected == len(ads_collected):
	break

	# Store the new count of offers collected
	count_adscollected == len(ads_collected)

	# For the animation
	sleep(2)

	# Iterate on the scroll
	count_scroll += 1

	# For the animation too
	if count_scroll == 10:
	break

	# Stop the driver
	driver.close()