Skip to content

Instantly share code, notes, and snippets.

@rubeniskov
Last active June 3, 2020 16:31
Show Gist options
  • Save rubeniskov/b8656b301adffe28bbb08698c283c804 to your computer and use it in GitHub Desktop.
Save rubeniskov/b8656b301adffe28bbb08698c283c804 to your computer and use it in GitHub Desktop.
email_list_from_location
#!/usr/bin/env node
const util = require("util");
const yargs = require("yargs");
const request = util.promisify(require("request"));
const puppeteer = require("puppeteer");
const fs = require("fs");
const delayMs = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
const getContent = (page, url) =>
page
.goto(url)
.then(() => page.content())
.catch(() => "");
const unique = (arr) =>
arr.filter((value, index, self) => self.indexOf(value) === index);
const extractEmails = (content) => {
const emailRegexp = /(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/gim;
return unique(content.match(emailRegexp) || []);
};
const getEmailFromUrl = async (page, url) => {
const content = await getContent(page, url);
return extractEmails(content);
};
const flatten = (arr) => [].concat(...arr);
const searchPlaces = async (params) => {
const { key, delay = 2000, ...opts } = params;
const {
body: { results, next_page_token, status },
} = await request({
method: "GET",
url: `https://maps.googleapis.com/maps/api/place/nearbysearch/json`,
qs: {
...opts,
key,
},
json: true,
});
process.stderr.write(
`Response status ${status} for ${JSON.stringify(opts, null, 4)}\n`
);
if (next_page_token) {
process.stderr.write(`Found more results ${next_page_token}\n`);
await delayMs(delay);
const moreResults = await searchPlaces({
...opts,
key,
pagetoken: next_page_token,
});
return [...results, ...moreResults];
}
return results;
};
const obtainPlaces = async (params) => {
const results = await searchPlaces(params);
process.stderr.write(`Found ${results.length} results\n`);
const places = [];
for (let { name, place_id } of results) {
process.stderr.write(`Requesting detailed information ${name}\n`);
const { body } = await request({
method: "GET",
url: `https://maps.googleapis.com/maps/api/place/details/json`,
qs: {
place_id,
fields: "website,name,price_level,rating,formatted_phone_number",
...params,
},
json: true,
});
places.push(body.result);
}
return places;
};
const main = async ({
output = "email.csv",
columns = ["name", "email"],
csv_delimiter = ",",
...restOpts
}) => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const places = await obtainPlaces(restOpts);
// Filtering
const candidates = places.filter(
({ website, rating }) => !!website && rating > 2
);
fs.writeFileSync(output, columns.join(csv_delimiter));
for (let place of candidates) {
const { website, name } = place;
process.stderr.write(`Requesting email of ${name} from ${website}\n`);
const placeEmails = await getEmailFromUrl(page, website);
const email = flatten(placeEmails)[0];
if (email) {
const info = { email, ...place };
process.stderr.write(
`Main email ${email} selected to save into ${output}\n`
);
fs.appendFileSync(
output,
columns.map((name) => info[name]).join(csv_delimiter) + "\n"
);
} else {
process.stderr.write(`Email not found email for ${name}\n`);
}
}
};
main(yargs.argv);
{
"name": "email_list_from_location",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "nodemon index.js",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"nodemon": "^2.0.4",
"puppeteer": "^3.3.0",
"yargs": "^15.3.1"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment