Created
February 14, 2025 17:24
-
-
Save AshikNesin/45dfb3cc0f0bcc84a6ca7e947327bea8 to your computer and use it in GitHub Desktop.
Sitemap Broken or Redirect Link Checker
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "sitemap-checker", | |
"version": "1.0.0", | |
"main": "index.js", | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"keywords": [], | |
"author": "", | |
"license": "ISC", | |
"description": "", | |
"dependencies": { | |
"axios": "^1.7.9", | |
"fast-xml-parser": "^4.5.1" | |
}, | |
"type": "module" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Generated using Claude Sonnet 3.5 | |
import axios from "axios"; | |
import { XMLParser } from "fast-xml-parser"; | |
import { writeFile } from "fs/promises"; | |
import { parse as parseURL } from "url"; | |
class SitemapChecker { | |
constructor(sitemapUrl) { | |
this.sitemapUrl = sitemapUrl; | |
this.parser = new XMLParser(); | |
this.results = []; | |
} | |
async #parseSitemap() { | |
try { | |
const { data } = await axios.get(this.sitemapUrl); | |
const parsed = this.parser.parse(data); | |
return parsed.urlset.url.map((url) => url.loc); | |
} catch (error) { | |
console.error("Error parsing sitemap:", error.message); | |
throw error; | |
} | |
} | |
async #checkUrl(url) { | |
try { | |
const response = await axios.get(url, { | |
maxRedirects: 0, | |
validateStatus: (status) => true, // Don't throw on any status | |
}); | |
if ([301, 302, 404].includes(response.status)) { | |
return { | |
url, | |
status: response.status, | |
redirectUrl: response.headers.location || "", | |
timestamp: new Date().toISOString(), | |
}; | |
} | |
} catch (error) { | |
return { | |
url, | |
status: error.response?.status || "Error", | |
redirectUrl: error.response?.headers?.location || "", | |
timestamp: new Date().toISOString(), | |
}; | |
} | |
return null; | |
} | |
async check() { | |
try { | |
console.log("Fetching sitemap..."); | |
const urls = await this.#parseSitemap(); | |
console.log(`Found ${urls.length} URLs to check`); | |
const results = []; | |
const batchSize = 5; // Adjust based on your needs | |
for (let i = 0; i < urls.length; i += batchSize) { | |
const batch = urls.slice(i, i + batchSize); | |
const batchResults = await Promise.all( | |
batch.map((url) => this.#checkUrl(url)) | |
); | |
const filteredResults = batchResults.filter( | |
(result) => result !== null | |
); | |
results.push(...filteredResults); | |
console.log(`Processed ${i + batch.length}/${urls.length} URLs`); | |
} | |
this.results = results; | |
return results; | |
} catch (error) { | |
console.error("Error checking URLs:", error.message); | |
throw error; | |
} | |
} | |
async saveToCSV(filename = "problematic-urls.csv") { | |
if (this.results.length === 0) { | |
console.log("No problematic URLs found"); | |
return; | |
} | |
const csvContent = [ | |
"URL,Status Code,Redirect URL,Timestamp", | |
...this.results.map( | |
({ url, status, redirectUrl, timestamp }) => | |
`"${url}","${status}","${redirectUrl}","${timestamp}"` | |
), | |
].join("\n"); | |
await writeFile(filename, csvContent); | |
console.log(`Results saved to ${filename}`); | |
} | |
} | |
// Example usage | |
const main = async () => { | |
try { | |
const sitemapUrl = process.argv[2]; | |
if (!sitemapUrl) { | |
console.error("Please provide a sitemap URL as an argument"); | |
process.exit(1); | |
} | |
const checker = new SitemapChecker(sitemapUrl); | |
await checker.check(); | |
await checker.saveToCSV(); | |
} catch (error) { | |
console.error("Error:", error.message); | |
process.exit(1); | |
} | |
}; | |
// Run the script | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment