Skip to content

Instantly share code, notes, and snippets.

@agstover
Created February 4, 2020 19:33
Show Gist options
  • Save agstover/758908fa459fa2e994275ecbeef72fc6 to your computer and use it in GitHub Desktop.
Save agstover/758908fa459fa2e994275ecbeef72fc6 to your computer and use it in GitHub Desktop.
For Zeit Error
const $ = require('cheerio')
const got = require('got')
const set = require('set-value')
// should be either {slug} or {slug, {city, state}}
const urlList = [
{slug: 'hvac-technician'},
// {slug: 'hvac-technician', place: {city: 'Houston', state: 'TX'}}
]
const tags = {
avgSalary: 'sal-agg-nonbase__average-salary-value'
}
const makeUrls = urlList => urlList.map(makeUrl)
const makeUrl = ({slug = null, place = null}) => {
if(!slug) throw new Error("Request to Indeed must include at least a trade type slug")
if(place) return `https://www.indeed.com/career/${slug}/salaries/${place.city}--${place.state}`
return `https://www.indeed.com/career/${slug}`
}
const getHtmlData = async urls => {
const data = await Promise.all(urls.map(url => got(url)))
return data.map(data => data['body'])
}
const scrapeHtml = (html, tag) => $(`.${tag}`, html).text()
const extractDataFromHtml = html => Object.keys(tags).reduce((acc, tag) => {
console.log("TAG", tag)
acc[tag] = scrapeHtml(html, tags[tag])
return acc
},{})
const addPlaceData = data => data.map((datum, index) => ({...datum, place: urlList[index].place}))
const shapeReturnData = data => {
let combinedData = addPlaceData(data)
combinedData.reduce((acc, data) => {
if(!data.place) {
set(acc, `${data.slug}.USA`, data) = data
} else {
const location = `${data.place.city}${data.place.state}`
set(acc, `${data.slug}.${location}`, data)
}
},{})
}
/*
want result to look like
{
hvac: {
USA: {
avgSalary: $1111,
overtime: $1111
},
HoustonTX: {}
}
}
*/
module.exports = async (req, res) => {
const urls = makeUrls(urlList)
let htmls
try {
// These are all html blobs for the requested URL pages
htmlsData = await getHtmlData(urls)
// We then need to map over them and return an object that has the
// scraped values from the blobs matched to the trade/location data
let newUrlList = [...urlList]
newUrlList = newUrlList.map((url, index) => ({...url, data: extractDataFromHtml(htmlsData[index])}))
// const result = urlsToTradeData(htmls)
const returnData = shapeReturnData(newUrlList)
console.log("RETURN DATA", returnData)
res.json(returnData)
} catch(err) {
console.log("There was an error fetching urls", err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment