Created
February 21, 2018 16:35
-
-
Save robbiejaeger/1b7bb0f3d51ea4a366c862738de087d2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Nightmare = require('nightmare'); | |
const fs = require('fs'); | |
const peakUrls = require('../data/peak-urls.js'); | |
const goToPageAndGetData = async (pageLink) => { | |
try { | |
const nightmare = new Nightmare({ show: false }); | |
const result = await nightmare | |
.goto(pageLink) | |
.wait('#main_list') | |
.evaluate(() => { | |
const peakName = document.querySelector('.BldHdr2').innerText.trim(); | |
const tripReportRows = [...document.querySelectorAll('tr')].slice(2); | |
tripReports = tripReportRows.map((row) => { | |
const date = row.querySelector('div.buttonf a').innerText; | |
const report = row.querySelector('td div:nth-child(2)').innerText.split('Info:').pop().slice(1).trim(); | |
return {date, report}; | |
}) | |
return {peakName, tripReports}; | |
}) | |
.end(); | |
return result; | |
} catch(e) { | |
console.error(e); | |
} | |
}; | |
console.log(`Scraping trip reports for ${peakUrls.length} peaks. This might take a couple minutes...`); | |
const data = peakUrls.reduce(async (acc, pageLink, idx) => { | |
const dataArray = await acc; | |
const peakReports = await goToPageAndGetData(pageLink); | |
console.log(`${idx + 1}. ${peakReports.peakName} scraped`); | |
dataArray.push(peakReports); | |
return dataArray; | |
}, Promise.resolve([])); | |
data.then(result => { | |
const output = JSON.stringify(result, null, 2); | |
const fileNameToSave = 'trip-reports.json'; | |
fs.writeFile(`./data/${fileNameToSave}`, output, 'utf8', (err) => { | |
if (err) throw err; | |
console.log(`Scraped trip report data saved to file: data/${fileNameToSave}`); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment