Skip to content

Instantly share code, notes, and snippets.

@jeanmidevacc
Last active November 4, 2019 00:27
Show Gist options
  • Save jeanmidevacc/3046974d6b97e98e763c6df87abd6b08 to your computer and use it in GitHub Desktop.
Save jeanmidevacc/3046974d6b97e98e763c6df87abd6b08 to your computer and use it in GitHub Desktop.
Script to collect offers from a Turo research for a specific city
# Load the dependencies
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from bs4 import BeautifulSoup as bs
from time import sleep
# Define the main url (where to log the location)
url_main_page = "https://turo.com/en-us?locale=en_US"
# Define the city where to search the offers
location = "Montreal, Quebec"
# Define the driver for the execution
driver = webdriver.Firefox()
driver.maximize_window()
# Connect to the main page
driver.get(url_main_page)
# For the animation
sleep(5)
# Input the location
inputElement = driver.find_element_by_id("js-searchFormExpandedLocationInput")
inputElement.send_keys(location)
# Submit the research
python_button = driver.find_elements_by_xpath("//button[@class='button button--green searchFormExpanded-button searchFormExpanded-button--round u-hideTinyScreen']")[0]
python_button.click()
# For the animation
sleep(10)
# Scroll all along the page of the search
# Based on https://michaeljsanders.com/2017/05/12/scrapin-and-scrollin.html
# Define a low bound for the scrolling window
lowbound = 0
# Define the number of scrolls
count_scroll = 0
# Define the list that will part of the url for the ads
ads_collected = []
count_adscollected = 0
# Infinite loop
while True:
print("Scroll" , count_scroll)
# Define the high bound of the scrolling window
highbound = lowbound + 400
# Scroll between the low and the high bounds
driver.execute_script(f"window.scrollTo({lowbound}, {highbound});")
# Collect all the data on the current page area
source_data = driver.page_source
soup = bs(source_data,features="html.parser")
# Collect all the ads on the page
ads = soup.findAll('a',{'class':'vehicleCard'}, href= True)
if len(ads) > 0:
new_ads = [ad["href"] for ad in ads]
lowbound = highbound
ads_collected = list(set(ads_collected + new_ads))
# If the past and current length page are similar stop the scraping
if count_adscollected == len(ads_collected):
break
# Store the new count of offers collected
count_adscollected == len(ads_collected)
# For the animation
sleep(2)
# Iterate on the scroll
count_scroll += 1
# For the animation too
if count_scroll == 10:
break
# Stop the driver
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment