Created
September 21, 2022 20:43
-
-
Save colinfwren/705e0a532cb66156927f2afb1cf0b955 to your computer and use it in GitHub Desktop.
Download and convert embedded gist code from Medium export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fetch from 'node-fetch' | |
import {read} from 'to-vfile' | |
import {unified} from 'unified' | |
import rehypeParse from 'rehype-parse' | |
import rehypeRemark from 'rehype-remark' | |
import remarkStringify from 'remark-stringify' | |
import { selectAll } from "hast-util-select"; | |
import {toHtml} from "hast-util-to-html"; | |
async function downloadGistCode(url) { | |
try { | |
const rawUrl = `${url.split('.js')[0]}/raw` | |
const codeResp = await fetch(rawUrl) | |
return await codeResp.text() | |
} catch (error) { | |
console.error('Failed to fetch', url, error) | |
return false | |
} | |
} | |
function rehypeInlineGistScript() { | |
return async (tree) => { | |
const nodes = selectAll('script', tree) | |
await Promise.all(nodes.map(async (node) => { | |
if (node.properties.src.indexOf('gist') > -1) { // we only want to process gists, other embeds like tweets also use the script tag | |
const code = await downloadGistCode(node.properties.src) | |
node.properties = {} | |
node.type = 'text' | |
node.value = '\n```\n' + code + '\n```\n' // This will create codeblock syntax and add newlines to correctly format the codeblock | |
} | |
return node | |
})) | |
return tree | |
} | |
} | |
const paddingNode = { | |
type: 'text', | |
value: '\n', | |
} | |
async function convertHtmlToMarkdown(filePath, outputFolder ) { | |
const tree = await unified() | |
.use(rehypeParse, {fragment: true}) | |
.use(rehypeInlineGistScript) | |
.use(rehypeRemark, { | |
handlers: { // defines how to handle specific HTML tags | |
figure(h, node) { | |
const cleansedNode = { | |
...node, | |
properties: {}, | |
children: node.children.reduce((acc, child) => { | |
acc.push(child) | |
acc.push(paddingNode) | |
return acc | |
}, [paddingNode]) | |
} | |
return h(cleansedNode, 'html', toHtml(cleansedNode, { closeSelfClosing: true })) | |
}, | |
} | |
}) | |
.use(remarkStringify) | |
.process(await read(filePath)) | |
console.log(String(tree)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment