BhuwanPandey · January 8, 2025 12:27
diff --git a/restaurant_scrape.py b/restaurant_scrape.py
 import csv
 import time

 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.common.exceptions import TimeoutException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
 from webdriver_manager.chrome import ChromeDriverManager
 from selenium.webdriver.chrome.service import Service as ChromeService


 class ResturantScrape:

    def __init__(self, driver):
        self.driver = driver


    def wait_for_element(self, locator, timeout=5):
        try:
            element_present = WebDriverWait(self.driver, timeout).until(
                EC.presence_of_element_located(locator)
            )
        except TimeoutException:
            element_present = None
        return element_present

    def search_location(self, url, searchdata):
        self.driver.get(url)
        searchbox_identifier = (By.ID, "searchboxinput")
        searchbox_element = self.wait_for_element(searchbox_identifier)
        searchbox_element.send_keys(searchdata)
        time.sleep(2)
        searchbox_element.send_keys(Keys.RETURN)
        time.sleep(5)

    def collect_data(self, required_records, output_file):
        """
        Collect data for the specified number of records.
        """
        records = []
        scroll_attempts = 0
        max_scrolls = 10

        while len(records) < required_records and scroll_attempts < max_scrolls:
            records = self.driver.find_elements(
                By.XPATH, '//*[@class="Nv2PK THOPZb CpccDe "]//a'
            )

            if len(records) < required_records:
                if scroll_attempts == 0:
                    scroll_identifier = (
                        By.XPATH,
                        '//*[@class="m6QErb DxyBCb kA9KIf dS8AEf XiKgde ecceSd"]',
                    )
                else:
                    scroll_identifier = (
                        By.XPATH,
                        '//*[@class="m6QErb DxyBCb kA9KIf dS8AEf XiKgde ecceSd QjC7t"]',
                    )

                results_container = self.driver.find_element(*scroll_identifier)
                results_container.send_keys(Keys.PAGE_DOWN)
                time.sleep(2)
                scroll_attempts += 1
            else:
                break
        
        with open(output_file, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.writer(file)
            writer.writerow(["Name", "Link"])

            for tag in records:
                html_content = tag.get_attribute("outerHTML")
                soup = BeautifulSoup(html_content, "html.parser")
                a_tag = soup.find("a", class_="hfpxzc")
                if a_tag:
                    name = a_tag.get("aria-label")
                    link = a_tag.get("href")
                    writer.writerow([name, link])



 service = ChromeService(ChromeDriverManager().install())
 driver = webdriver.Chrome(service=service)
 location = "35.851880,-80.521580"
 endpoint = f"https://www.google.com/maps/@{location},12z?hl=en"
 output_file = "restaurants.csv"
 resturantscrape = ResturantScrape(driver)
 resturantscrape.search_location(endpoint, "restaurant")
 resturantscrape.collect_data(5, output_file)
	import csv
	import time

	from bs4 import BeautifulSoup
	from selenium import webdriver
	from selenium.common.exceptions import TimeoutException
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.support import expected_conditions as EC
	from selenium.webdriver.support.ui import WebDriverWait
	from webdriver_manager.chrome import ChromeDriverManager
	from selenium.webdriver.chrome.service import Service as ChromeService


	class ResturantScrape:

	def __init__(self, driver):
	self.driver = driver


	def wait_for_element(self, locator, timeout=5):
	try:
	element_present = WebDriverWait(self.driver, timeout).until(
	EC.presence_of_element_located(locator)
	)
	except TimeoutException:
	element_present = None
	return element_present

	def search_location(self, url, searchdata):
	self.driver.get(url)
	searchbox_identifier = (By.ID, "searchboxinput")
	searchbox_element = self.wait_for_element(searchbox_identifier)
	searchbox_element.send_keys(searchdata)
	time.sleep(2)
	searchbox_element.send_keys(Keys.RETURN)
	time.sleep(5)

	def collect_data(self, required_records, output_file):
	"""
	Collect data for the specified number of records.
	"""
	records = []
	scroll_attempts = 0
	max_scrolls = 10

	while len(records) < required_records and scroll_attempts < max_scrolls:
	records = self.driver.find_elements(
	By.XPATH, '//*[@class="Nv2PK THOPZb CpccDe "]//a'
	)

	if len(records) < required_records:
	if scroll_attempts == 0:
	scroll_identifier = (
	By.XPATH,
	'//*[@class="m6QErb DxyBCb kA9KIf dS8AEf XiKgde ecceSd"]',
	)
	else:
	scroll_identifier = (
	By.XPATH,
	'//*[@class="m6QErb DxyBCb kA9KIf dS8AEf XiKgde ecceSd QjC7t"]',
	)

	results_container = self.driver.find_element(*scroll_identifier)
	results_container.send_keys(Keys.PAGE_DOWN)
	time.sleep(2)
	scroll_attempts += 1
	else:
	break

	with open(output_file, mode="w", newline="", encoding="utf-8") as file:
	writer = csv.writer(file)
	writer.writerow(["Name", "Link"])

	for tag in records:
	html_content = tag.get_attribute("outerHTML")
	soup = BeautifulSoup(html_content, "html.parser")
	a_tag = soup.find("a", class_="hfpxzc")
	if a_tag:
	name = a_tag.get("aria-label")
	link = a_tag.get("href")
	writer.writerow([name, link])



	service = ChromeService(ChromeDriverManager().install())
	driver = webdriver.Chrome(service=service)
	location = "35.851880,-80.521580"
	endpoint = f"https://www.google.com/maps/@{location},12z?hl=en"
	output_file = "restaurants.csv"
	resturantscrape = ResturantScrape(driver)
	resturantscrape.search_location(endpoint, "restaurant")
	resturantscrape.collect_data(5, output_file)