Skip to content

Instantly share code, notes, and snippets.

@colinfwren
Created September 21, 2022 20:43
Show Gist options
  • Save colinfwren/705e0a532cb66156927f2afb1cf0b955 to your computer and use it in GitHub Desktop.
Save colinfwren/705e0a532cb66156927f2afb1cf0b955 to your computer and use it in GitHub Desktop.
Download and convert embedded gist code from Medium export
import fetch from 'node-fetch'
import {read} from 'to-vfile'
import {unified} from 'unified'
import rehypeParse from 'rehype-parse'
import rehypeRemark from 'rehype-remark'
import remarkStringify from 'remark-stringify'
import { selectAll } from "hast-util-select";
import {toHtml} from "hast-util-to-html";
async function downloadGistCode(url) {
try {
const rawUrl = `${url.split('.js')[0]}/raw`
const codeResp = await fetch(rawUrl)
return await codeResp.text()
} catch (error) {
console.error('Failed to fetch', url, error)
return false
}
}
function rehypeInlineGistScript() {
return async (tree) => {
const nodes = selectAll('script', tree)
await Promise.all(nodes.map(async (node) => {
if (node.properties.src.indexOf('gist') > -1) { // we only want to process gists, other embeds like tweets also use the script tag
const code = await downloadGistCode(node.properties.src)
node.properties = {}
node.type = 'text'
node.value = '\n```\n' + code + '\n```\n' // This will create codeblock syntax and add newlines to correctly format the codeblock
}
return node
}))
return tree
}
}
const paddingNode = {
type: 'text',
value: '\n',
}
async function convertHtmlToMarkdown(filePath, outputFolder ) {
const tree = await unified()
.use(rehypeParse, {fragment: true})
.use(rehypeInlineGistScript)
.use(rehypeRemark, {
handlers: { // defines how to handle specific HTML tags
figure(h, node) {
const cleansedNode = {
...node,
properties: {},
children: node.children.reduce((acc, child) => {
acc.push(child)
acc.push(paddingNode)
return acc
}, [paddingNode])
}
return h(cleansedNode, 'html', toHtml(cleansedNode, { closeSelfClosing: true }))
},
}
})
.use(remarkStringify)
.process(await read(filePath))
console.log(String(tree))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment