Last active
June 3, 2020 16:31
-
-
Save rubeniskov/b8656b301adffe28bbb08698c283c804 to your computer and use it in GitHub Desktop.
email_list_from_location
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
const util = require("util"); | |
const yargs = require("yargs"); | |
const request = util.promisify(require("request")); | |
const puppeteer = require("puppeteer"); | |
const fs = require("fs"); | |
const delayMs = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); | |
const getContent = (page, url) => | |
page | |
.goto(url) | |
.then(() => page.content()) | |
.catch(() => ""); | |
const unique = (arr) => | |
arr.filter((value, index, self) => self.indexOf(value) === index); | |
const extractEmails = (content) => { | |
const emailRegexp = /(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))/gim; | |
return unique(content.match(emailRegexp) || []); | |
}; | |
const getEmailFromUrl = async (page, url) => { | |
const content = await getContent(page, url); | |
return extractEmails(content); | |
}; | |
const flatten = (arr) => [].concat(...arr); | |
const searchPlaces = async (params) => { | |
const { key, delay = 2000, ...opts } = params; | |
const { | |
body: { results, next_page_token, status }, | |
} = await request({ | |
method: "GET", | |
url: `https://maps.googleapis.com/maps/api/place/nearbysearch/json`, | |
qs: { | |
...opts, | |
key, | |
}, | |
json: true, | |
}); | |
process.stderr.write( | |
`Response status ${status} for ${JSON.stringify(opts, null, 4)}\n` | |
); | |
if (next_page_token) { | |
process.stderr.write(`Found more results ${next_page_token}\n`); | |
await delayMs(delay); | |
const moreResults = await searchPlaces({ | |
...opts, | |
key, | |
pagetoken: next_page_token, | |
}); | |
return [...results, ...moreResults]; | |
} | |
return results; | |
}; | |
const obtainPlaces = async (params) => { | |
const results = await searchPlaces(params); | |
process.stderr.write(`Found ${results.length} results\n`); | |
const places = []; | |
for (let { name, place_id } of results) { | |
process.stderr.write(`Requesting detailed information ${name}\n`); | |
const { body } = await request({ | |
method: "GET", | |
url: `https://maps.googleapis.com/maps/api/place/details/json`, | |
qs: { | |
place_id, | |
fields: "website,name,price_level,rating,formatted_phone_number", | |
...params, | |
}, | |
json: true, | |
}); | |
places.push(body.result); | |
} | |
return places; | |
}; | |
const main = async ({ | |
output = "email.csv", | |
columns = ["name", "email"], | |
csv_delimiter = ",", | |
...restOpts | |
}) => { | |
const browser = await puppeteer.launch(); | |
const page = await browser.newPage(); | |
const places = await obtainPlaces(restOpts); | |
// Filtering | |
const candidates = places.filter( | |
({ website, rating }) => !!website && rating > 2 | |
); | |
fs.writeFileSync(output, columns.join(csv_delimiter)); | |
for (let place of candidates) { | |
const { website, name } = place; | |
process.stderr.write(`Requesting email of ${name} from ${website}\n`); | |
const placeEmails = await getEmailFromUrl(page, website); | |
const email = flatten(placeEmails)[0]; | |
if (email) { | |
const info = { email, ...place }; | |
process.stderr.write( | |
`Main email ${email} selected to save into ${output}\n` | |
); | |
fs.appendFileSync( | |
output, | |
columns.map((name) => info[name]).join(csv_delimiter) + "\n" | |
); | |
} else { | |
process.stderr.write(`Email not found email for ${name}\n`); | |
} | |
} | |
}; | |
main(yargs.argv); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "email_list_from_location", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"scripts": { | |
"start": "nodemon index.js", | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"nodemon": "^2.0.4", | |
"puppeteer": "^3.3.0", | |
"yargs": "^15.3.1" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment