Skip to content

Instantly share code, notes, and snippets.

@BYK
Last active July 2, 2025 11:56
Show Gist options
  • Save BYK/d8b9bdba5d1ea9bc12fdfb2157d93854 to your computer and use it in GitHub Desktop.
Save BYK/d8b9bdba5d1ea9bc12fdfb2157d93854 to your computer and use it in GitHub Desktop.
import type { Root, Heading } from "mdast";
import rehypeParse from "rehype-parse";
import rehypeRemark from "rehype-remark";
import remarkStringify from "remark-stringify";
import { unified } from "unified";
function extractMDSection({ section }: { section?: RegExp }) {
return (tree: Root) => {
const headingIdx = tree.children.findIndex((node) => {
return (
node.type === "heading" &&
node.children[0] &&
node.children[0].type === "link" &&
section?.test(node.children[0].url)
);
});
const heading = tree.children[headingIdx] as Heading;
const nextHeadingIdx = tree.children.findIndex(
(node, idx) =>
idx > headingIdx &&
node.type === "heading" &&
node.depth === heading.depth
);
tree.children = tree.children.slice(
headingIdx,
nextHeadingIdx === -1 ? undefined : nextHeadingIdx
);
return tree;
};
}
export const getWebpageAsMarkdown = async (url: string, section?: RegExp) => {
const response = await fetch(url);
const text = await response.text();
return String(
await unified()
.use(rehypeParse)
.use(rehypeRemark)
.use(extractMDSection, { section })
.use(remarkStringify)
.process(text)
);
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment