Last active
June 28, 2021 11:46
-
-
Save yevhenorlov/b7401fce7500f8ba53314559c62edc34 to your computer and use it in GitHub Desktop.
a simple node.js scraper for automatically checking item availability on perfectcircuit.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require("puppeteer"); | |
const URLS = [ | |
"https://www.perfectcircuit.com/intellijel-zeroscope-1u.html", // Zeroscope Oscilloscope 1U | |
"https://www.perfectcircuit.com/expert-sleepers-es-8.html", // ES-8 USB Audio Interface | |
"https://www.perfectcircuit.com/4ms-pingable-envelope-generator.html", // Pingable Envelope Generator | |
"https://www.perfectcircuit.com/make-noise-erbe-verb-dsp-reverb-module.html", // Erbe-Verb DSP Reverb | |
"https://www.perfectcircuit.com/xaoc-devices-batumi-model-1974.html", // Batumi Quad LFO | |
"https://www.perfectcircuit.com/intellijel-shifty.html", // Intellijel Shifty | |
"https://www.perfectcircuit.com/eowave-domino.html", // Eowave Domino Full Synth Voice | |
"https://www.perfectcircuit.com/addac-603.html", // ADDAC603 VC Triple Bandpass Filter | |
"https://www.perfectcircuit.com/wmd-performance-mixer.html", // WMD Performance Mixer | |
"https://www.perfectcircuit.com/expert-sleepers-super-disting-ex-plus-alpha.html", // Super Disting EX Plus Alpha | |
]; | |
const getPage = async () => { | |
const browser = await puppeteer.launch(); | |
const close = () => browser.close(); | |
return { browser, close }; | |
}; | |
const parseAvailabilityFromPage = (browser) => async (url) => { | |
const page = await browser.newPage(); | |
await page.goto(url); | |
// Execute code in the DOM | |
const data = await page.evaluate((url) => { | |
const availabilityNode = document.querySelector( | |
".product-info-main .amstockstatus em" | |
); | |
const status = availabilityNode && availabilityNode.innerText; | |
if (!status) { | |
throw new Error(`No matching availability selector for ${url}`); | |
} | |
const nameNode = document.querySelector("h1.page-title"); | |
const name = nameNode && nameNode.innerText.replace("\n", " "); | |
return { name, status, url }; | |
}, url); | |
return data; | |
}; | |
const scrapeUrls = async (page) => { | |
return Promise.all(URLS.map(parseAvailabilityFromPage(page))); | |
}; | |
const main = async () => { | |
const { browser, close } = await getPage(); | |
try { | |
const data = await scrapeUrls(browser); | |
const results = data.reduce((acc, { name, status, url }) => { | |
if (!acc[status]) { | |
acc[status] = {}; | |
} | |
acc[status][name] = url; | |
return acc; | |
}, {}); | |
console.log(results); | |
} catch (err) { | |
console.error(err); | |
} | |
await close(); | |
}; | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment