Skip to content

Instantly share code, notes, and snippets.

@colinfwren
Created September 21, 2022 21:07
Show Gist options
  • Save colinfwren/cd188cdc9e06cf87ea6772bb834faf1a to your computer and use it in GitHub Desktop.
Save colinfwren/cd188cdc9e06cf87ea6772bb834faf1a to your computer and use it in GitHub Desktop.
Clean up the Markdown output
import {read} from 'to-vfile'
import {unified} from 'unified'
import rehypeParse from 'rehype-parse'
import rehypeRemark from 'rehype-remark'
import remarkStringify from 'remark-stringify'
import {visit} from "unist-util-visit";
function removeMediumExtras() {
return (tree) => {
const article = tree.children.find(x => x.tagName === 'article')
article.children = article.children.filter((node) => node.properties && node.properties.dataField === 'body')
visit(tree, { tagName: 'hr' }, (node, index, parent) => {
if (node.properties.className.includes('section-divider')) {
parent.children.splice(index, 1)
}
})
visit(tree, { tagName: 'h3' }, (node, index, parent) => {
if (node.properties.className.includes('graf--title')) {
parent.children.splice(index, 1)
}
})
}
}
async function convertHtmlToMarkdown(filePath, outputFolder ) {
const tree = await unified()
.use(rehypeParse, {fragment: true})
.use(removeMediumExtras)
.use(rehypeRemark)
.use(remarkStringify)
.process(await read(filePath))
console.log(String(tree).replace(/ | |/g, ' '))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment