Skip to content

Instantly share code, notes, and snippets.

@yin1999
Last active December 21, 2024 12:59
Show Gist options
  • Save yin1999/87ed8e4fa08c92af54ed888011ba9529 to your computer and use it in GitHub Desktop.
Save yin1999/87ed8e4fa08c92af54ed888011ba9529 to your computer and use it in GitHub Desktop.
"use strict";
import fs from "node:fs/promises";
import * as path from "node:path";
import { fdir } from "fdir";
import ora from "ora";
import { fromMarkdown } from "mdast-util-from-markdown";
import { visit } from "unist-util-visit";
const spinner = ora().start();
// Set the locale to fix the URLs (old "Learn area" URLs)
const locale = "zh-CN";
/**
* define types
*
* @typedef {Array<{url: string, line: number, column: number, correct: string}>} UrlErrors
* @typedef {Array<{url: string, line: number, column: number}>} Urls
*/
/**
*
* @param {string} rawContent
* @returns {Urls}
*/
function findUrlInText(rawContent) {
const urls = [];
for (const match of rawContent.matchAll(/href=['"]([^'"]+)['"]/g)) {
const left = rawContent.slice(0, match.index);
const line = (left.match(/\n/g) || []).length + 1;
const lastIndexOf = left.lastIndexOf("\n") + 1;
const column = match.index - lastIndexOf + 1 + ("href".length + 2);
urls.push({ url: match[1], line, column });
}
return urls;
}
/**
*
* @param {string} content
* @returns {Urls}
*/
function findUrlInMarkdown(content) {
const tree = fromMarkdown(content);
const urls = [];
visit(tree, ["link", "html"], (node) => {
if (node.type === "link") {
if (node.children.length === 1) {
urls.push({
url: node.url,
line: node.children[0].position.end.line,
column: node.children[0].position.end.column + 2,
});
} else {
urls.push({
url: node.url,
line: node.position.start.line,
column: node.position.start.column + 3,
});
}
} else {
// html
const urlsInHtml = findUrlInText(node.value);
const correctedUrls = urlsInHtml.map(({ url, line, column }) => {
if (line === 1) {
// if it's the first line, we need to add the column offset
column += node.position.start.column - 1;
}
line += node.position.start.line - 1;
return { url, line, column };
});
urls.push(...correctedUrls);
}
});
return urls;
}
/**
*
* @param {string} content
* @param {string} docLocale
* @param {Map<string, string>} redirects
* @returns {UrlErrors}
*/
function checkUrl(content, docLocale, redirects) {
const urls = findUrlInMarkdown(content);
const reportUrls = [];
for (const { url, line, column } of urls) {
const urlParts = url.split("/").slice(1);
if (urlParts.length < 2 || urlParts[1] !== "docs") {
// ignore non-docs links
continue;
}
const urlLocale = urlParts[0];
if (urlLocale === docLocale) {
const [slug, ...rest] = urlParts.slice(2).join("/").split("#");
const hash = rest?.[0] ? `#${rest[0]}` : "";
if (redirects.has(slug)) {
reportUrls.push({
url,
line,
column,
correct: redirects.get(slug) + hash,
});
}
}
}
return reportUrls;
}
/**
*
* @param {string} content
* @param {UrlErrors} errors
*/
function fixUrl(content, errors) {
errors.sort((a, b) => {
if (a.line === b.line) {
// sort by column, descending
return b.column - a.column;
}
return a.line - b.line;
});
const lines = content.split("\n");
for (const { url, line, column, correct } of errors) {
let lineContent = lines[line - 1];
const prefix = lineContent.slice(0, column - 1);
const suffix = lineContent.slice(column - 1).replace(url, correct);
lines[line - 1] = `${prefix}${suffix}`;
}
return lines.join("\n");
}
/**
*
* @param {string} originLocale
*/
async function readRedirects(originLocale) {
const redirects = new Map();
for (const locale of [originLocale, "en-US"]) {
let path = `./files/${locale.toLowerCase()}/_redirects.txt`;
if (locale === "en-US") {
path = `../content/files/${locale.toLowerCase()}/_redirects.txt`;
}
const content = await fs.readFile(path, "utf8");
for (const line of content.split("\n")) {
if (line.startsWith("#")) {
continue
}
let [from, to] = line.split("\t");
if (!from || !to) {
continue
}
from = from.replace(`/${locale}/docs/`, "");
if (!from.startsWith(`Learn/`)) {
continue
}
if (to.startsWith(`/${locale}/docs/`)) {
to = to.replace(`/${locale}/docs/`, `/${originLocale}/docs/`);
}
redirects.set(from, to);
}
}
return redirects;
}
async function main() {
const files = [];
spinner.text = "Crawling files...";
files.push(
...new fdir()
.withBasePath()
.filter((path) => path.endsWith(".md"))
.crawl(`./files/${locale.toLowerCase()}/`)
.sync(),
);
let exitCode = 0;
const redirects = await readRedirects(locale);
for (const i in files) {
const file = files[i];
spinner.text = `${i}/${files.length}: ${file}...`;
const relativePath = path.relative(process.cwd(), file);
try {
const originContent = await fs.readFile(relativePath, "utf8");
const urlErrors = checkUrl(originContent, locale, redirects);
if (urlErrors.length > 0) {
spinner.info(
`${file}: Found ${urlErrors.length} URL errors! Fixing...`,
);
const newContent = fixUrl(
originContent,
urlErrors,
);
if (newContent === originContent) {
spinner.fail(`${file}: Fixing URL errors failed!`);
exitCode = 1;
} else {
await fs.writeFile(relativePath, newContent);
}
spinner.start();
}
} catch (e) {
spinner.fail(`${file}: ${e}`);
spinner.start();
}
}
spinner.stop();
if (exitCode === 0) {
console.log("Checked all files successfully!");
} else {
process.exitCode = exitCode;
}
}
await main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment