Skip to content

Instantly share code, notes, and snippets.

@kuroski
Last active May 31, 2022 14:51
Show Gist options
  • Save kuroski/b3129f6319c27e83f1a602362d11ba9f to your computer and use it in GitHub Desktop.
Save kuroski/b3129f6319c27e83f1a602362d11ba9f to your computer and use it in GitHub Desktop.
Check if urls from sitemap.xml file are active
/* eslint-disable */
const fs = require('fs')
const urlExists = require('url-exists')
const parser = require('xml2js')
const { PromisePool } = require('@supercharge/promise-pool')
async function init() {
const sitemapXml = fs.readFileSync('./public/sitemap.xml', 'utf8')
parser.parseString(sitemapXml, async (err, result) => {
const urls = result.urlset.url.map(({ loc }) => loc).flat()
const { results, errors } = await PromisePool.withConcurrency(10)
.for(urls)
.onTaskStarted((item, pool) => {
console.log(`Processing ${item}`)
})
.process(async (url, index, pool) => {
return new Promise((resolve) => {
urlExists(url, (_, exists) => resolve({ url, exists }))
})
})
const p = results.reduce(
(acc, curr) => ({
exists: [...acc.exists, ...(curr.exists ? [curr.url] : [])],
notExists: [...acc.notExists, ...(!curr.exists ? [curr.url] : [])],
}),
{ exists: [], notExists: [] }
)
console.log({
exists: p.exists.length,
notExists: p.notExists.length,
})
fs.writeFileSync('./result.json', JSON.stringify(p))
})
}
init()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment