Skip to content

Instantly share code, notes, and snippets.

@adrianhorning08
Created August 4, 2023 23:17
Show Gist options
  • Save adrianhorning08/dd72c19670b488ac5b42ec292a6d158a to your computer and use it in GitHub Desktop.
Save adrianhorning08/dd72c19670b488ac5b42ec292a6d158a to your computer and use it in GitHub Desktop.
Scrape Google Maps
import * as cheerio from "cheerio";
import puppeteerExtra from "puppeteer-extra";
import stealthPlugin from "puppeteer-extra-plugin-stealth";
import chromium from "@sparticuz/chromium";
async function searchGoogleMaps() {
try {
const start = Date.now();
puppeteerExtra.use(stealthPlugin());
const browser = await puppeteerExtra.launch({
headless: false,
// headless: "new",
// devtools: true,
executablePath: "", // your path here
});
// const browser = await puppeteerExtra.launch({
// args: chromium.args,
// defaultViewport: chromium.defaultViewport,
// executablePath: await chromium.executablePath(),
// headless: "new",
// ignoreHTTPSErrors: true,
// });
const page = await browser.newPage();
const query = "Auto repair shops austin";
try {
await page.goto(
`https://www.google.com/maps/search/${query.split(" ").join("+")}`
);
} catch (error) {
console.log("error going to page");
}
async function autoScroll(page) {
await page.evaluate(async () => {
const wrapper = document.querySelector('div[role="feed"]');
await new Promise((resolve, reject) => {
var totalHeight = 0;
var distance = 1000;
var scrollDelay = 3000;
var timer = setInterval(async () => {
var scrollHeightBefore = wrapper.scrollHeight;
wrapper.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeightBefore) {
totalHeight = 0;
await new Promise((resolve) => setTimeout(resolve, scrollDelay));
// Calculate scrollHeight after waiting
var scrollHeightAfter = wrapper.scrollHeight;
if (scrollHeightAfter > scrollHeightBefore) {
// More content loaded, keep scrolling
return;
} else {
// No more content loaded, stop scrolling
clearInterval(timer);
resolve();
}
}
}, 200);
});
});
}
await autoScroll(page);
const html = await page.content();
const pages = await browser.pages();
await Promise.all(pages.map((page) => page.close()));
await browser.close();
console.log("browser closed");
// get all a tag parent where a tag href includes /maps/place/
const $ = cheerio.load(html);
const aTags = $("a");
const parents = [];
aTags.each((i, el) => {
const href = $(el).attr("href");
if (!href) {
return;
}
if (href.includes("/maps/place/")) {
parents.push($(el).parent());
}
});
console.log("parents", parents.length);
const buisnesses = [];
parents.forEach((parent) => {
const url = parent.find("a").attr("href");
// get a tag where data-value="Website"
const website = parent.find('a[data-value="Website"]').attr("href");
// find a div that includes the class fontHeadlineSmall
const storeName = parent.find("div.fontHeadlineSmall").text();
// find span that includes class fontBodyMedium
const ratingText = parent
.find("span.fontBodyMedium > span")
.attr("aria-label");
// get the first div that includes the class fontBodyMedium
const bodyDiv = parent.find("div.fontBodyMedium").first();
const children = bodyDiv.children();
const lastChild = children.last();
const firstOfLast = lastChild.children().first();
const lastOfLast = lastChild.children().last();
buisnesses.push({
placeId: `ChI${url?.split("?")?.[0]?.split("ChI")?.[1]}`,
address: firstOfLast?.text()?.split("·")?.[1]?.trim(),
category: firstOfLast?.text()?.split("·")?.[0]?.trim(),
phone: lastOfLast?.text()?.split("·")?.[1]?.trim(),
googleUrl: url,
bizWebsite: website,
storeName,
ratingText,
stars: ratingText?.split("stars")?.[0]?.trim()
? Number(ratingText?.split("stars")?.[0]?.trim())
: null,
numberOfReviews: ratingText
?.split("stars")?.[1]
?.replace("Reviews", "")
?.trim()
? Number(
ratingText?.split("stars")?.[1]?.replace("Reviews", "")?.trim()
)
: null,
});
});
const end = Date.now();
console.log(`time in seconds ${Math.floor((end - start) / 1000)}`);
return buisnesses;
} catch (error) {
console.log("error at googleMaps", error.message);
}
}
@titoih
Copy link

titoih commented Nov 30, 2023

hi there!
Thanks for sharing.
What about this:

  • after scraping, save phone numbers in google contacts, as well as emails in a bulk email provider like mailchimp, to send commercial emails.

Could you contact me for talking about this?
Cheers

@muhamed-didovic
Copy link

@titoih I can help you with that, hit me up

@ChathurangaCPM
Copy link

@adrianhorning08 thanks for sharing,

Can I get images and prices for places (in a hotel search) like that? Is it possible?

@arham-sayyed
Copy link

As of November 2024
This is no longer capable of scraping the website links

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment