Created
February 4, 2020 19:33
-
-
Save agstover/758908fa459fa2e994275ecbeef72fc6 to your computer and use it in GitHub Desktop.
For Zeit Error
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const $ = require('cheerio') | |
const got = require('got') | |
const set = require('set-value') | |
// should be either {slug} or {slug, {city, state}} | |
const urlList = [ | |
{slug: 'hvac-technician'}, | |
// {slug: 'hvac-technician', place: {city: 'Houston', state: 'TX'}} | |
] | |
const tags = { | |
avgSalary: 'sal-agg-nonbase__average-salary-value' | |
} | |
const makeUrls = urlList => urlList.map(makeUrl) | |
const makeUrl = ({slug = null, place = null}) => { | |
if(!slug) throw new Error("Request to Indeed must include at least a trade type slug") | |
if(place) return `https://www.indeed.com/career/${slug}/salaries/${place.city}--${place.state}` | |
return `https://www.indeed.com/career/${slug}` | |
} | |
const getHtmlData = async urls => { | |
const data = await Promise.all(urls.map(url => got(url))) | |
return data.map(data => data['body']) | |
} | |
const scrapeHtml = (html, tag) => $(`.${tag}`, html).text() | |
const extractDataFromHtml = html => Object.keys(tags).reduce((acc, tag) => { | |
console.log("TAG", tag) | |
acc[tag] = scrapeHtml(html, tags[tag]) | |
return acc | |
},{}) | |
const addPlaceData = data => data.map((datum, index) => ({...datum, place: urlList[index].place})) | |
const shapeReturnData = data => { | |
let combinedData = addPlaceData(data) | |
combinedData.reduce((acc, data) => { | |
if(!data.place) { | |
set(acc, `${data.slug}.USA`, data) = data | |
} else { | |
const location = `${data.place.city}${data.place.state}` | |
set(acc, `${data.slug}.${location}`, data) | |
} | |
},{}) | |
} | |
/* | |
want result to look like | |
{ | |
hvac: { | |
USA: { | |
avgSalary: $1111, | |
overtime: $1111 | |
}, | |
HoustonTX: {} | |
} | |
} | |
*/ | |
module.exports = async (req, res) => { | |
const urls = makeUrls(urlList) | |
let htmls | |
try { | |
// These are all html blobs for the requested URL pages | |
htmlsData = await getHtmlData(urls) | |
// We then need to map over them and return an object that has the | |
// scraped values from the blobs matched to the trade/location data | |
let newUrlList = [...urlList] | |
newUrlList = newUrlList.map((url, index) => ({...url, data: extractDataFromHtml(htmlsData[index])})) | |
// const result = urlsToTradeData(htmls) | |
const returnData = shapeReturnData(newUrlList) | |
console.log("RETURN DATA", returnData) | |
res.json(returnData) | |
} catch(err) { | |
console.log("There was an error fetching urls", err) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment