Created
April 13, 2025 14:27
-
-
Save yin1999/7c17d3053588d5123e43154bb9842219 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
import fs from "node:fs/promises"; | |
import * as path from "node:path"; | |
import { fdir } from "fdir"; | |
import ora from "ora"; | |
import { fromMarkdown } from "mdast-util-from-markdown"; | |
import { visit } from "unist-util-visit"; | |
const spinner = ora().start(); | |
// Set the locale to fix the URLs | |
const locale = "zh-CN"; | |
// 设置为最终链接即可 | |
const startPaths = []; | |
/** | |
* define types | |
* | |
* @typedef {Array<{url: string, line: number, column: number, correct: string}>} UrlErrors | |
* @typedef {Array<{url: string, line: number, column: number}>} Urls | |
*/ | |
/** | |
* | |
* @param {string} rawContent | |
* @returns {Urls} | |
*/ | |
function findUrlInText(rawContent) { | |
const urls = []; | |
for (const match of rawContent.matchAll(/href=['"]([^'"]+)['"]/g)) { | |
const left = rawContent.slice(0, match.index); | |
const line = (left.match(/\n/g) || []).length + 1; | |
const lastIndexOf = left.lastIndexOf("\n") + 1; | |
const column = match.index - lastIndexOf + 1 + ("href".length + 2); | |
urls.push({ url: match[1], line, column }); | |
} | |
return urls; | |
} | |
/** | |
* | |
* @param {string} content | |
* @returns {Urls} | |
*/ | |
function findUrlInMarkdown(content) { | |
const tree = fromMarkdown(content); | |
const urls = []; | |
visit(tree, ["link", "html"], (node) => { | |
if (node.type === "link") { | |
if (node.children.length === 1) { | |
urls.push({ | |
url: node.url, | |
line: node.children[0].position.end.line, | |
column: node.children[0].position.end.column + 2, | |
}); | |
} else { | |
urls.push({ | |
url: node.url, | |
line: node.position.start.line, | |
column: node.position.start.column + 3, | |
}); | |
} | |
} else { | |
// html | |
const urlsInHtml = findUrlInText(node.value); | |
const correctedUrls = urlsInHtml.map(({ url, line, column }) => { | |
if (line === 1) { | |
// if it's the first line, we need to add the column offset | |
column += node.position.start.column - 1; | |
} | |
line += node.position.start.line - 1; | |
return { url, line, column }; | |
}); | |
urls.push(...correctedUrls); | |
} | |
}); | |
return urls; | |
} | |
/** | |
* | |
* @param {string} content | |
* @param {string} docLocale | |
* @param {Map<string, string>} redirects | |
* @returns {UrlErrors} | |
*/ | |
function checkUrl(content, docLocale, redirects) { | |
const urls = findUrlInMarkdown(content); | |
const reportUrls = []; | |
for (const { url, line, column } of urls) { | |
const urlParts = url.split("/").slice(1); | |
if (urlParts.length < 2 || urlParts[1] !== "docs") { | |
// ignore non-docs links | |
continue; | |
} | |
const urlLocale = urlParts[0]; | |
if (urlLocale === docLocale) { | |
const [slug, ...rest] = urlParts.slice(2).join("/").split("#"); | |
const hash = rest?.[0] ? `#${rest[0]}` : ""; | |
if (redirects.has(slug)) { | |
reportUrls.push({ | |
url, | |
line, | |
column, | |
correct: redirects.get(slug) + hash, | |
}); | |
} | |
} | |
} | |
return reportUrls; | |
} | |
/** | |
* | |
* @param {string} content | |
* @param {UrlErrors} errors | |
*/ | |
function fixUrl(content, errors) { | |
errors.sort((a, b) => { | |
if (a.line === b.line) { | |
// sort by column, descending | |
return b.column - a.column; | |
} | |
return a.line - b.line; | |
}); | |
const lines = content.split("\n"); | |
for (const { url, line, column, correct } of errors) { | |
let lineContent = lines[line - 1]; | |
const prefix = lineContent.slice(0, column - 1); | |
const suffix = lineContent.slice(column - 1).replace(url, correct); | |
lines[line - 1] = `${prefix}${suffix}`; | |
} | |
return lines.join("\n"); | |
} | |
/** | |
* | |
* @param {string} originLocale | |
*/ | |
async function readRedirects(originLocale) { | |
const redirects = new Map(); | |
for (const locale of [originLocale, "en-US"]) { | |
let path = `./files/${locale.toLowerCase()}/_redirects.txt`; | |
if (locale === "en-US") { | |
path = `../content/files/${locale.toLowerCase()}/_redirects.txt`; | |
} | |
const content = await fs.readFile(path, "utf8"); | |
for (const line of content.split("\n")) { | |
if (line.startsWith("#")) { | |
continue; | |
} | |
let [from, to] = line.split("\t"); | |
if (!from || !to) { | |
continue; | |
} | |
from = from.replace(`/${locale}/docs/`, ""); | |
if ( | |
!startPaths.some( | |
(startPath) => | |
from.startsWith(startPath) || | |
to.startsWith(`/${locale}/docs/${startPath}`), | |
) | |
) { | |
continue; | |
} | |
if (to.startsWith(`/${locale}/docs/`)) { | |
to = to.replace(`/${locale}/docs/`, `/${originLocale}/docs/`); | |
} | |
redirects.set(from, to); | |
} | |
} | |
return redirects; | |
} | |
async function main() { | |
const files = []; | |
spinner.text = "Crawling files..."; | |
files.push( | |
...new fdir() | |
.withBasePath() | |
.filter((path) => path.endsWith(".md")) | |
.crawl(`./files/${locale.toLowerCase()}/`) | |
.sync(), | |
); | |
let exitCode = 0; | |
const redirects = await readRedirects(locale); | |
for (const i in files) { | |
const file = files[i]; | |
spinner.text = `${i}/${files.length}: ${file}...`; | |
const relativePath = path.relative(process.cwd(), file); | |
try { | |
const originContent = await fs.readFile(relativePath, "utf8"); | |
const urlErrors = checkUrl(originContent, locale, redirects); | |
if (urlErrors.length > 0) { | |
spinner.info( | |
`${file}: Found ${urlErrors.length} URL errors! Fixing...`, | |
); | |
const newContent = fixUrl(originContent, urlErrors); | |
if (newContent === originContent) { | |
spinner.fail(`${file}: Fixing URL errors failed!`); | |
exitCode = 1; | |
} else { | |
await fs.writeFile(relativePath, newContent); | |
} | |
spinner.start(); | |
} | |
} catch (e) { | |
spinner.fail(`${file}: ${e}`); | |
spinner.start(); | |
} | |
} | |
spinner.stop(); | |
if (exitCode === 0) { | |
console.log("Checked all files successfully!"); | |
} else { | |
process.exitCode = exitCode; | |
} | |
} | |
await main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment