Skip to content

Instantly share code, notes, and snippets.

@BhuwanPandey
Created January 8, 2025 12:27
Show Gist options
  • Save BhuwanPandey/b3b6d649d348f97168dd6a9d7bec66d9 to your computer and use it in GitHub Desktop.
Save BhuwanPandey/b3b6d649d348f97168dd6a9d7bec66d9 to your computer and use it in GitHub Desktop.
Scrape Restaurant data with Selenium
import csv
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
class ResturantScrape:
def __init__(self, driver):
self.driver = driver
def wait_for_element(self, locator, timeout=5):
try:
element_present = WebDriverWait(self.driver, timeout).until(
EC.presence_of_element_located(locator)
)
except TimeoutException:
element_present = None
return element_present
def search_location(self, url, searchdata):
self.driver.get(url)
searchbox_identifier = (By.ID, "searchboxinput")
searchbox_element = self.wait_for_element(searchbox_identifier)
searchbox_element.send_keys(searchdata)
time.sleep(2)
searchbox_element.send_keys(Keys.RETURN)
time.sleep(5)
def collect_data(self, required_records, output_file):
"""
Collect data for the specified number of records.
"""
records = []
scroll_attempts = 0
max_scrolls = 10
while len(records) < required_records and scroll_attempts < max_scrolls:
records = self.driver.find_elements(
By.XPATH, '//*[@class="Nv2PK THOPZb CpccDe "]//a'
)
if len(records) < required_records:
if scroll_attempts == 0:
scroll_identifier = (
By.XPATH,
'//*[@class="m6QErb DxyBCb kA9KIf dS8AEf XiKgde ecceSd"]',
)
else:
scroll_identifier = (
By.XPATH,
'//*[@class="m6QErb DxyBCb kA9KIf dS8AEf XiKgde ecceSd QjC7t"]',
)
results_container = self.driver.find_element(*scroll_identifier)
results_container.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
scroll_attempts += 1
else:
break
with open(output_file, mode="w", newline="", encoding="utf-8") as file:
writer = csv.writer(file)
writer.writerow(["Name", "Link"])
for tag in records:
html_content = tag.get_attribute("outerHTML")
soup = BeautifulSoup(html_content, "html.parser")
a_tag = soup.find("a", class_="hfpxzc")
if a_tag:
name = a_tag.get("aria-label")
link = a_tag.get("href")
writer.writerow([name, link])
service = ChromeService(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
location = "35.851880,-80.521580"
endpoint = f"https://www.google.com/maps/@{location},12z?hl=en"
output_file = "restaurants.csv"
resturantscrape = ResturantScrape(driver)
resturantscrape.search_location(endpoint, "restaurant")
resturantscrape.collect_data(5, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment