Last active
July 2, 2025 11:56
-
-
Save BYK/d8b9bdba5d1ea9bc12fdfb2157d93854 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import type { Root, Heading } from "mdast"; | |
import rehypeParse from "rehype-parse"; | |
import rehypeRemark from "rehype-remark"; | |
import remarkStringify from "remark-stringify"; | |
import { unified } from "unified"; | |
function extractMDSection({ section }: { section?: RegExp }) { | |
return (tree: Root) => { | |
const headingIdx = tree.children.findIndex((node) => { | |
return ( | |
node.type === "heading" && | |
node.children[0] && | |
node.children[0].type === "link" && | |
section?.test(node.children[0].url) | |
); | |
}); | |
const heading = tree.children[headingIdx] as Heading; | |
const nextHeadingIdx = tree.children.findIndex( | |
(node, idx) => | |
idx > headingIdx && | |
node.type === "heading" && | |
node.depth === heading.depth | |
); | |
tree.children = tree.children.slice( | |
headingIdx, | |
nextHeadingIdx === -1 ? undefined : nextHeadingIdx | |
); | |
return tree; | |
}; | |
} | |
export const getWebpageAsMarkdown = async (url: string, section?: RegExp) => { | |
const response = await fetch(url); | |
const text = await response.text(); | |
return String( | |
await unified() | |
.use(rehypeParse) | |
.use(rehypeRemark) | |
.use(extractMDSection, { section }) | |
.use(remarkStringify) | |
.process(text) | |
); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment