Last active
April 13, 2021 13:11
-
-
Save alexjyong/726fd0af5b6ec35e218ab1151a84277a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
const puppeteer = require('puppeteer'); | |
const createCsvWriter = require('csv-writer').createObjectCsvWriter; | |
async function main() { | |
var entries=[];//store all the food pantry data; | |
//headless so i can see what this badboy is doing | |
const browser = await puppeteer.launch({ | |
headless: true | |
}); | |
var pages = await browser.pages(); | |
var page = pages[0]; | |
//will log console.log stuff to the console instead of the browser | |
page.on('console', consoleObj => console.log(consoleObj.text())); | |
await page.goto('https://www.foodpantries.org/st/indiana'); | |
var foodPantryLinks = await page.evaluate(() => { | |
var links = []; | |
const linksYo = document.querySelectorAll('td > a'); | |
for (const el of linksYo) { | |
links.push(el.href); | |
} | |
return links; | |
}); | |
for (let link of foodPantryLinks){ | |
await page.goto(link); | |
try { | |
var jsonBlob = await page.evaluate(()=> { | |
var jsonString = document.querySelectorAll('script[type*="application/ld+json"]')[3].innerText; | |
jsonString = jsonString.replace(/(\r\n|\n|\r)/gm, ""); //remove newlines so we don't make json parsing sad | |
var jsonBlob = JSON.parse(jsonString); | |
var innerPageInfo = document.evaluate("//a[contains(., 'View Website and Full Address')]", document, null, XPathResult.ANY_TYPE, null ); | |
var thisUrl = innerPageInfo.iterateNext(); | |
thisUrl = thisUrl.href; | |
jsonBlob['innerPage'] = thisUrl; | |
return jsonBlob; | |
});//end page evaluate | |
await page.goto(jsonBlob.innerPage); | |
var otherBlobYo = await page.evaluate(()=> { | |
var someBlob={}; | |
//get facebook and website links | |
var websiteLinks = document.querySelectorAll('div[class*="widget widget_tags"] > li > a'); | |
if (websiteLinks.length >0){ //sometimes a place doesn't have a website or facebook :( | |
for (var i=0; i< websiteLinks.length; i++){ | |
var website = websiteLinks[i]; | |
if (website.innerText == "Website"){ | |
someBlob['website'] = website.href; | |
} | |
if (website.innerText == "Facebook"){ | |
someBlob['facebook'] = website.href; | |
} | |
} | |
} | |
return someBlob; | |
}); | |
var entry = {streetAddress: jsonBlob.address.streetAddress, name:jsonBlob.name, telephone:jsonBlob.telephone, facebook:otherBlobYo.facebook, website:otherBlobYo.website}; | |
entries.push(entry); | |
}//end try | |
catch(err){ | |
console.log(err); | |
console.log("Failed to get records for " + link); | |
} | |
} | |
console.log(entries); | |
const csvWriter = createCsvWriter({ | |
path: 'out.csv', | |
fieldDelimiter: ';', | |
header: [ | |
{id: 'name', title: 'Name'}, | |
{id: 'telephone', title: 'Telephone'}, | |
{id: 'streetAddress', title: 'Street Address'}, | |
{id: 'facebook', title: 'Facebook'}, | |
{id: 'website', title:'Website'} | |
] | |
}); | |
await csvWriter | |
.writeRecords(entries) | |
.then(()=> console.log('The CSV file was written successfully')); | |
browser.close(); | |
process.exit("Export complete"); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment