Skip to content

Instantly share code, notes, and snippets.

@generalmimon
Created May 29, 2020 23:22
Show Gist options
  • Save generalmimon/37fa71ccd864318e505f16340c12fa3f to your computer and use it in GitHub Desktop.
Save generalmimon/37fa71ccd864318e505f16340c12fa3f to your computer and use it in GitHub Desktop.
Dig URL paths from Just Solve The File Format Problem wiki in parallel with Node.js
const http = require('http');
const MAX_ACTIVE = 100; // maximum number of parallel requests
let numActiveReqs = 0;
let numWaitingReqs = 0;
const paths = new Set();
const options = {
host: 'fileformats.archiveteam.org',
port: 80,
method: 'HEAD',
path: '/wiki/Special:Random'
};
const resolveWaitingReqs = () => {
let canLaunch = Math.min(MAX_ACTIVE - numActiveReqs, numWaitingReqs);
while (canLaunch-- > 0) {
runReq();
++numActiveReqs;
--numWaitingReqs;
}
};
const respHandler = (res) => {
--numActiveReqs;
resolveWaitingReqs();
if (res.statusCode !== 302) {
console.error(`Expected STATUS 302, but got ${res.statusCode} ${http.STATUS_CODES[res.statusCode]}`);
return;
}
const loc = res.headers.location;
const prefix = `http://${options.host}/` + 'wiki/';
if (!loc.startsWith(prefix)) {
console.error(`${loc} doesn't start with ${prefix}`);
return;
}
const path = loc.substring(prefix.length);
if (paths.has(path)) return;
paths.add(path);
};
const runReq = () => {
const req = http.request(options);
req.on('response', respHandler);
req.end();
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment